commit a126becea27945ea9b961a41958d6d4dbdf93dff
parent 3eb1bba007c88ed0e45f9234967602b1f2d56b8c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 23 May 2026 10:03:00 -0700
cg: extend memory ops with effective-address rider
Fold base + index*scale + offset directly onto loads and stores. Adds
CfreeCgEffAddr to the public API; cfree_cg_load/store now take an EA
alongside CfreeCgMemAccess (which also gains bit-field metadata). Drops
the old standalone cfree_cg_index / cfree_cg_field / cfree_cg_addr_offset
/ cfree_cg_indirect / cfree_cg_push_symbol_lvalue helpers — frontends
ride field offsets in ea.offset and array scales in ea.scale.
Operand.ind grows an index register and log2_scale. x64 emits SIB
natively; rv64/aa64/c_target fold via the new arch_lower_indexed helper.
opt threads OPT_USE_INDIRECT_INDEX through def-use, copy-prop, emit,
analysis, hard-live, inline, ssa, and the addr-xform/GVN paths in
pass_o2 (zero-EA uses still fold to OPK_LOCAL; EA-shaped uses keep the
IR_ADDR_OF alive). C, toy, and wasm frontends migrated; tests updated.
Diffstat:
50 files changed, 3211 insertions(+), 1305 deletions(-)
diff --git a/include/cfree/cg.h b/include/cfree/cg.h
@@ -249,8 +249,28 @@ typedef struct CfreeCgMemAccess {
uint32_t flags; /* CfreeCgMemAccessFlag */
uint32_t alias_scope;
uint32_t noalias_scope;
+ /* Bit-field metadata. When bit_width != 0 the memop performs a bit-field
+ * access at bit range [bit_offset, bit_offset + bit_width) within a
+ * storage_size-byte container located at the EA. */
+ uint16_t bit_offset;
+ uint16_t bit_width;
+ uint32_t storage_size;
+ int bit_signed;
} CfreeCgMemAccess;
+/* Effective address rider on the memops: encodes
+ * base + index * scale + offset directly on the load/store.
+ *
+ * scale == 0 indicates "no index"; the memop consumes only [base].
+ * scale > 0 indicates an indexed access; the memop consumes [base, index]
+ * and multiplies index by scale raw bytes (frontends pass element size
+ * explicitly — not log2). offset is a signed byte displacement folded into
+ * the addressing mode where the target permits. */
+typedef struct CfreeCgEffAddr {
+ int64_t offset;
+ uint32_t scale;
+} CfreeCgEffAddr;
+
/* ============================================================
* Declarations and Symbols
* ============================================================ */
@@ -400,7 +420,6 @@ void cfree_cg_func_end(CfreeCg*);
typedef enum CfreeCgLocalFlag {
CFREE_CG_LOCALFLAG_NONE = 0,
- CFREE_CG_LOCAL_ADDRESS_TAKEN = 1u << 0,
CFREE_CG_LOCAL_ARTIFICIAL = 1u << 1,
CFREE_CG_LOCAL_OPTIMIZED_OUT = 1u << 2,
CFREE_CG_LOCAL_COMPILER_TEMP = 1u << 3,
@@ -517,6 +536,7 @@ void cfree_cg_unreachable(CfreeCg*);
* ============================================================ */
void cfree_cg_dup(CfreeCg*);
+void cfree_cg_dup2(CfreeCg*); /* duplicates the top two slots */
void cfree_cg_swap(CfreeCg*);
void cfree_cg_drop(CfreeCg*);
void cfree_cg_rot3(CfreeCg*); /* [..., a, b, c] -> [..., b, c, a] */
@@ -543,32 +563,27 @@ CfreeCgSym cfree_cg_const_data(CfreeCg*, const uint8_t* data, size_t len,
* or equivalent lowering from the symbol attrs and output mode. */
void cfree_cg_push_symbol_addr(CfreeCg*, CfreeCgSym sym, int64_t addend);
-/* Pushes an lvalue backed by sym + addend. For TLS objects this materializes
- * the current thread's instance address as needed and then treats it as an
- * indirect lvalue. */
-void cfree_cg_push_symbol_lvalue(CfreeCg*, CfreeCgSym sym, int64_t addend);
-
-/* Pops a pointer rvalue or lvalue address and pushes address + byte_offset as
- * the requested result pointer/lvalue type. This is the generic primitive for
- * frontend-owned aggregate layouts and non-standard record field offsets. */
-void cfree_cg_addr_offset(CfreeCg*, int64_t byte_offset,
- CfreeCgTypeId result_type);
-
-/* Computes base + offset + index * element-size and pushes the element lvalue.
- * Stack is [base, index]. The element size comes from the base pointer/array
- * type and the access descriptor used by the eventual memory operation. */
-void cfree_cg_index(CfreeCg*, uint64_t offset);
-
-/* Pops a record lvalue and pushes the field lvalue. Offset is inferred from
- * the record type and field_index. Use cfree_cg_addr after this when an
- * address is required. */
-void cfree_cg_field(CfreeCg*, uint32_t field_index);
-
-/* Converts a pointer rvalue TOS from *T to an lvalue T. */
-void cfree_cg_indirect(CfreeCg*);
-void cfree_cg_load(CfreeCg*, CfreeCgMemAccess access);
+/* Projects an lvalue TOS back to a pointer rvalue (e.g. for `&x`, passing to
+ * a call, escape). */
void cfree_cg_addr(CfreeCg*);
-void cfree_cg_store(CfreeCg*, CfreeCgMemAccess access); /* [lv, rv] -> [] */
+
+/* Single load/store ops with an effective-address rider.
+ *
+ * `base` is either an lvalue (push_local) or a pointer-typed value. When
+ * `ea.scale == 0` the memop consumes only [base]; when `ea.scale > 0` it
+ * also pops an integer-typed `index` (with signedness inherited from its
+ * producer). Field offsets ride in `ea.offset`; array scales ride in
+ * `ea.scale` as raw bytes.
+ *
+ * Stack effects:
+ * scale == 0:
+ * load: [base] -> [value]
+ * store: [base, value] -> []
+ * scale > 0:
+ * load: [base, index] -> [value]
+ * store: [base, index, value] -> [] */
+void cfree_cg_load(CfreeCg*, CfreeCgMemAccess access, CfreeCgEffAddr ea);
+void cfree_cg_store(CfreeCg*, CfreeCgMemAccess access, CfreeCgEffAddr ea);
/* ============================================================
* ABI variadic argument access
@@ -1053,28 +1068,46 @@ static inline void cfree_cg_musttail_call_symbol(CfreeCg* cg, CfreeCgSym sym,
cfree_cg_call_symbol(cg, sym, nargs, attrs);
}
+/* Read the scalar value of a local. Stack: [] -> [value]. */
+static inline void cfree_cg_local_read(CfreeCg* cg, CfreeCgLocal local,
+ CfreeCgMemAccess access) {
+ CfreeCgEffAddr ea = {0, 0};
+ cfree_cg_push_local(cg, local);
+ cfree_cg_load(cg, access, ea);
+}
+
+/* Write the scalar value on TOS into a local. Stack: [value] -> []. */
+static inline void cfree_cg_local_write(CfreeCg* cg, CfreeCgLocal local,
+ CfreeCgMemAccess access) {
+ CfreeCgEffAddr ea = {0, 0};
+ cfree_cg_push_local(cg, local); /* [value, lv] */
+ cfree_cg_swap(cg); /* [lv, value] */
+ cfree_cg_store(cg, access, ea);
+}
+
/* Increment/decrement an lvalue in place. Stack: [lv] -> [result].
* post=1 pushes the old value; post=0 pushes the new value.
* op is CFREE_CG_INT_ADD or CFREE_CG_INT_SUB. ty is the promoted integer type
* of the lvalue. */
static inline void cfree_cg_inc_dec(CfreeCg* cg, CfreeCgIntBinOp op, int post,
CfreeCgTypeId ty, CfreeCgMemAccess access) {
- cfree_cg_dup(cg); /* [lv, lv] */
- cfree_cg_load(cg, access); /* [lv, old] */
+ CfreeCgEffAddr ea = {0, 0};
+ cfree_cg_dup(cg); /* [lv, lv] */
+ cfree_cg_load(cg, access, ea); /* [lv, old] */
if (post) {
- cfree_cg_dup(cg); /* [lv, old, old] */
- cfree_cg_push_int(cg, 1, ty); /* [lv, old, old, 1] */
- cfree_cg_int_binop(cg, op, 0); /* [lv, old, new] */
- cfree_cg_rot3(cg); /* [old, new, lv] */
- cfree_cg_swap(cg); /* [old, lv, new] */
- cfree_cg_store(cg, access); /* [old] */
+ cfree_cg_dup(cg); /* [lv, old, old] */
+ cfree_cg_push_int(cg, 1, ty); /* [lv, old, old, 1] */
+ cfree_cg_int_binop(cg, op, 0); /* [lv, old, new] */
+ cfree_cg_rot3(cg); /* [old, new, lv] */
+ cfree_cg_swap(cg); /* [old, lv, new] */
+ cfree_cg_store(cg, access, ea); /* [old] */
} else {
- cfree_cg_push_int(cg, 1, ty); /* [lv, old, 1] */
- cfree_cg_int_binop(cg, op, 0); /* [lv, new] */
- cfree_cg_dup(cg); /* [lv, new, new] */
- cfree_cg_rot3(cg); /* [new, new, lv] */
- cfree_cg_swap(cg); /* [new, lv, new] */
- cfree_cg_store(cg, access); /* [new] */
+ cfree_cg_push_int(cg, 1, ty); /* [lv, old, 1] */
+ cfree_cg_int_binop(cg, op, 0); /* [lv, new] */
+ cfree_cg_dup(cg); /* [lv, new, new] */
+ cfree_cg_rot3(cg); /* [new, new, lv] */
+ cfree_cg_swap(cg); /* [new, lv, new] */
+ cfree_cg_store(cg, access, ea); /* [new] */
}
}
diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c
@@ -38,9 +38,22 @@ CfreeCgMemAccess pcg_mem(Parser* p, const Type* ty) {
return m;
}
+static void pcg_aux_clear(PcgLvAux* a) {
+ a->offset = 0;
+ a->scale = 0;
+ a->bit_offset = 0;
+ a->bit_width = 0;
+ a->storage_size = 0;
+ a->bit_signed = 0;
+ a->base_kind = PCG_LV_BASE_LOCAL;
+ a->pad[0] = a->pad[1] = a->pad[2] = a->pad[3] = 0;
+ a->pad[4] = a->pad[5] = 0;
+}
+
static void pcg_stack_grow(Parser* p, u32 want) {
const Type** ns;
u8* nf;
+ PcgLvAux* na;
u32 nc;
if (p->cg_type_cap >= want) return;
nc = p->cg_type_cap ? p->cg_type_cap * 2u : 64u;
@@ -49,14 +62,20 @@ static void pcg_stack_grow(Parser* p, u32 want) {
if (!ns) perr(p, "out of memory in CG type stack");
nf = arena_zarray(p->pool->arena, u8, nc);
if (!nf) perr(p, "out of memory in CG value stack");
+ na = arena_zarray(p->pool->arena, PcgLvAux, nc);
+ if (!na) perr(p, "out of memory in CG lvalue aux stack");
if (p->cg_type_stack && p->cg_type_sp) {
memcpy(ns, p->cg_type_stack, sizeof(*ns) * p->cg_type_sp);
}
if (p->cg_value_flags && p->cg_type_sp) {
memcpy(nf, p->cg_value_flags, sizeof(*nf) * p->cg_type_sp);
}
+ if (p->cg_lv_aux && p->cg_type_sp) {
+ memcpy(na, p->cg_lv_aux, sizeof(*na) * p->cg_type_sp);
+ }
p->cg_type_stack = ns;
p->cg_value_flags = nf;
+ p->cg_lv_aux = na;
p->cg_type_cap = nc;
}
@@ -64,6 +83,7 @@ void pcg_push_type(Parser* p, const Type* ty) {
pcg_stack_grow(p, p->cg_type_sp + 1u);
p->cg_type_stack[p->cg_type_sp] = ty;
p->cg_value_flags[p->cg_type_sp] = 0;
+ pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp]);
++p->cg_type_sp;
}
@@ -74,19 +94,28 @@ void pcg_drop_type(Parser* p) {
void pcg_dup_type(Parser* p) {
const Type* ty = pcg_top_type(p);
u8 flags = p->cg_type_sp ? p->cg_value_flags[p->cg_type_sp - 1u] : 0;
+ PcgLvAux aux;
+ if (p->cg_type_sp) aux = p->cg_lv_aux[p->cg_type_sp - 1u];
+ else pcg_aux_clear(&aux);
pcg_push_type(p, ty);
- if (p->cg_type_sp) p->cg_value_flags[p->cg_type_sp - 1u] = flags;
+ if (p->cg_type_sp) {
+ p->cg_value_flags[p->cg_type_sp - 1u] = flags;
+ p->cg_lv_aux[p->cg_type_sp - 1u] = aux;
+ }
}
void pcg_swap_type(Parser* p) {
if (p->cg_type_sp >= 2) {
const Type* a = p->cg_type_stack[p->cg_type_sp - 1u];
u8 af = p->cg_value_flags[p->cg_type_sp - 1u];
+ PcgLvAux ax = p->cg_lv_aux[p->cg_type_sp - 1u];
p->cg_type_stack[p->cg_type_sp - 1u] = p->cg_type_stack[p->cg_type_sp - 2u];
p->cg_value_flags[p->cg_type_sp - 1u] =
p->cg_value_flags[p->cg_type_sp - 2u];
+ p->cg_lv_aux[p->cg_type_sp - 1u] = p->cg_lv_aux[p->cg_type_sp - 2u];
p->cg_type_stack[p->cg_type_sp - 2u] = a;
p->cg_value_flags[p->cg_type_sp - 2u] = af;
+ p->cg_lv_aux[p->cg_type_sp - 2u] = ax;
}
}
@@ -94,17 +123,30 @@ void pcg_rot3_type(Parser* p) {
if (p->cg_type_sp >= 3) {
const Type* a = p->cg_type_stack[p->cg_type_sp - 3u];
u8 af = p->cg_value_flags[p->cg_type_sp - 3u];
+ PcgLvAux ax = p->cg_lv_aux[p->cg_type_sp - 3u];
p->cg_type_stack[p->cg_type_sp - 3u] = p->cg_type_stack[p->cg_type_sp - 2u];
p->cg_value_flags[p->cg_type_sp - 3u] =
p->cg_value_flags[p->cg_type_sp - 2u];
+ p->cg_lv_aux[p->cg_type_sp - 3u] = p->cg_lv_aux[p->cg_type_sp - 2u];
p->cg_type_stack[p->cg_type_sp - 2u] = p->cg_type_stack[p->cg_type_sp - 1u];
p->cg_value_flags[p->cg_type_sp - 2u] =
p->cg_value_flags[p->cg_type_sp - 1u];
+ p->cg_lv_aux[p->cg_type_sp - 2u] = p->cg_lv_aux[p->cg_type_sp - 1u];
p->cg_type_stack[p->cg_type_sp - 1u] = a;
p->cg_value_flags[p->cg_type_sp - 1u] = af;
+ p->cg_lv_aux[p->cg_type_sp - 1u] = ax;
}
}
+PcgLvAux* pcg_top_lv_aux(Parser* p) {
+ return p->cg_type_sp ? &p->cg_lv_aux[p->cg_type_sp - 1u] : NULL;
+}
+
+PcgLvAux* pcg_lv_aux_at(Parser* p, u32 depth) {
+ return (p->cg_type_sp > depth) ? &p->cg_lv_aux[p->cg_type_sp - 1u - depth]
+ : NULL;
+}
+
const Type* pcg_top_type(Parser* p) {
return p->cg_type_sp ? p->cg_type_stack[p->cg_type_sp - 1u] : NULL;
}
@@ -117,6 +159,7 @@ void pcg_retag_top(Parser* p, const Type* ty) {
if (p->cg_type_sp) {
p->cg_type_stack[p->cg_type_sp - 1u] = ty;
p->cg_value_flags[p->cg_type_sp - 1u] = 0;
+ pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp - 1u]);
}
}
@@ -319,7 +362,10 @@ FrameSlot pcg_local(Parser* p, const FrameSlotDesc* fsd) {
if (!pcg_emit_enabled(p)) return FRAME_SLOT_NONE;
attrs.name = fsd->name;
attrs.align = fsd->align;
- if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_LOCAL_ADDRESS_TAKEN;
+ /* FSF_ADDR_TAKEN is no longer propagated to CG: there is no
+ * CFREE_CG_LOCAL_ADDRESS_TAKEN attribute. The C-side flag stays for any
+ * parser-internal uses; opt's opt_promote_scalar_locals (Stream I) decides
+ * register-promotion from observed access patterns, not from the flag. */
return cfree_cg_local(p->cg, pcg_tid(p, fsd->type), attrs);
}
@@ -329,7 +375,6 @@ FrameSlot pcg_param_slot(Parser* p, u32 index, const FrameSlotDesc* fsd) {
memset(&attrs, 0, sizeof attrs);
attrs.name = fsd->name;
attrs.align = fsd->align;
- if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_LOCAL_ADDRESS_TAKEN;
return cfree_cg_param(p->cg, index, pcg_tid(p, fsd->type), attrs);
}
@@ -363,54 +408,230 @@ void pcg_push_float(Parser* p, double v, const Type* ty) {
pcg_push_type(p, ty);
}
+/* Fill `access` (CfreeCgMemAccess) and `ea` (CfreeCgEffAddr) for a memop
+ * against the TOS lvalue. The lvalue's pending EA on aux is consumed: caller
+ * is expected to follow with the matching cfree_cg_load / cfree_cg_store. */
+static void pcg_consume_ea_for_top(Parser* p, const Type* access_ty,
+ CfreeCgMemAccess* access,
+ CfreeCgEffAddr* ea) {
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ *access = pcg_mem(p, access_ty);
+ if (lv && lv->bit_width) {
+ access->bit_offset = lv->bit_offset;
+ access->bit_width = lv->bit_width;
+ access->storage_size = lv->storage_size;
+ access->bit_signed = lv->bit_signed;
+ }
+ ea->offset = lv ? lv->offset : 0;
+ ea->scale = lv ? lv->scale : 0u;
+}
+
void pcg_push_local_typed(Parser* p, FrameSlot s, const Type* ty) {
if (pcg_emit_enabled(p)) cfree_cg_push_local(p->cg, s);
pcg_push_type(p, ty);
- if (p->cg_type_sp)
+ if (p->cg_type_sp) {
p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(ty);
+ p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_LOCAL;
+ }
}
void pcg_push_global(Parser* p, ObjSymId sym, const Type* ty) {
- if (pcg_emit_enabled(p)) cfree_cg_push_symbol_lvalue(p->cg, sym, 0);
+ /* push_symbol_addr produces a pointer rvalue; the parser tags the slot as
+ * a C-language lvalue with PCG_LV_BASE_POINTER_RV so subsequent
+ * load/store/addr know the base is already a pointer. The cg layer accepts
+ * pointer-rvalue bases for memops uniformly (Stream A). */
+ if (pcg_emit_enabled(p)) cfree_cg_push_symbol_addr(p->cg, sym, 0);
pcg_push_type(p, ty);
- if (p->cg_type_sp)
+ if (p->cg_type_sp) {
p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(ty);
+ p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_POINTER_RV;
+ }
}
void pcg_load(Parser* p) {
+ const Type* ty = pcg_top_type(p);
int was_lvalue = pcg_top_is_lvalue(p);
- if (pcg_emit_enabled(p)) cfree_cg_load(p->cg, pcg_mem(p, pcg_top_type(p)));
- if (was_lvalue && p->cg_type_sp) p->cg_value_flags[p->cg_type_sp - 1u] = 0;
+ if (pcg_emit_enabled(p)) {
+ CfreeCgMemAccess access;
+ CfreeCgEffAddr ea;
+ pcg_consume_ea_for_top(p, ty, &access, &ea);
+ cfree_cg_load(p->cg, access, ea);
+ }
+ if (was_lvalue && p->cg_type_sp) {
+ p->cg_value_flags[p->cg_type_sp - 1u] = 0;
+ pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp - 1u]);
+ }
+}
+
+/* Materialize the pending EA on the TOS lvalue as a pointer rvalue.
+ * Postcondition: TOS is a pointer rvalue of type result_ptr_ty (which the
+ * caller has computed as type_ptr(pool, current_lv_type)) and the CG stack
+ * holds that single pointer where the lvalue's [base] or [base, index] used
+ * to be. Aux is cleared.
+ *
+ * The materialization sequence depends on the aux:
+ * base_kind == LOCAL:
+ * - scale == 0, offset == 0: addr
+ * - scale == 0, offset != 0: addr ; ptr_to_int ; +offset ; int_to_ptr
+ * - scale != 0: addr ; ptr_to_int ; idx*scale + ofs ; int_to_ptr
+ * base_kind == POINTER_RV:
+ * - scale == 0, offset == 0: no-op
+ * - scale == 0, offset != 0: ptr_to_int ; +offset ; int_to_ptr
+ * - scale != 0: ptr_to_int ; idx*scale + ofs ; int_to_ptr */
+static void pcg_materialize_lv_to_ptr(Parser* p, const Type* result_ptr_ty) {
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ int emit = pcg_emit_enabled(p);
+ PcgLvBaseKind base_kind = lv ? (PcgLvBaseKind)lv->base_kind
+ : PCG_LV_BASE_LOCAL;
+ i64 ofs = lv ? lv->offset : 0;
+ u32 scale = lv ? lv->scale : 0u;
+ const Type* idx_ty = c_abi_ptrdiff_type(p->abi, p->pool);
+ CfreeCgTypeId idx_tid = pcg_tid(p, idx_ty);
+ CfreeCgTypeId ptr_tid = pcg_tid(p, result_ptr_ty);
+ if (scale == 0 && ofs == 0) {
+ if (base_kind == PCG_LV_BASE_LOCAL) {
+ if (emit) cfree_cg_addr(p->cg);
+ }
+ /* Already a pointer with no pending modifiers. */
+ } else if (scale == 0) {
+ if (emit) {
+ if (base_kind == PCG_LV_BASE_LOCAL) cfree_cg_addr(p->cg);
+ cfree_cg_ptr_to_int(p->cg, idx_tid);
+ cfree_cg_push_int(p->cg, (uint64_t)ofs, idx_tid);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE);
+ cfree_cg_int_to_ptr(p->cg, ptr_tid);
+ }
+ } else {
+ /* CG stack on entry: [base_ptr_now, index]. Compute
+ * base_ptr_now + index*scale + ofs. */
+ if (emit) {
+ if (base_kind == PCG_LV_BASE_LOCAL) {
+ cfree_cg_swap(p->cg); /* [index, base_lv] */
+ cfree_cg_addr(p->cg); /* [index, base_ptr] */
+ cfree_cg_swap(p->cg); /* [base_ptr, index] */
+ }
+ cfree_cg_swap(p->cg); /* [index, base_ptr] */
+ cfree_cg_ptr_to_int(p->cg, idx_tid);
+ cfree_cg_swap(p->cg); /* [base_int, index] */
+ cfree_cg_push_int(p->cg, (uint64_t)scale, idx_tid);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_MUL, CFREE_CG_INTOP_NONE);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE);
+ if (ofs != 0) {
+ cfree_cg_push_int(p->cg, (uint64_t)ofs, idx_tid);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE);
+ }
+ cfree_cg_int_to_ptr(p->cg, ptr_tid);
+ }
+ }
+ pcg_retag_top(p, result_ptr_ty);
+ {
+ PcgLvAux* out = pcg_top_lv_aux(p);
+ if (out) out->base_kind = PCG_LV_BASE_POINTER_RV;
+ }
}
void pcg_addr(Parser* p) {
const Type* ty = pcg_top_type(p);
- if (pcg_emit_enabled(p)) cfree_cg_addr(p->cg);
- pcg_retag_top(p, type_ptr(p->pool, ty));
+ pcg_materialize_lv_to_ptr(p, type_ptr(p->pool, ty));
}
+/* Store [lv, rv] -> [rv]. The expression-value of an assignment is the
+ * assigned rvalue, so the store sequence must leave a copy of rv on TOS. */
void pcg_store(Parser* p) {
const Type* lv_ty = pcg_top2_type(p);
const Type* rv_ty = pcg_top_type(p);
const Type* mem_ty = lv_ty;
int emit = pcg_emit_enabled(p);
+ CfreeCgMemAccess access;
+ CfreeCgEffAddr ea;
+ PcgLvAux* lv;
if (rv_ty && type_is_ptr(rv_ty) && (!lv_ty || !type_is_ptr(lv_ty))) {
mem_ty = rv_ty;
}
- if (emit) cfree_cg_dup(p->cg);
- pcg_dup_type(p);
- if (emit) cfree_cg_rot3(p->cg);
- pcg_rot3_type(p);
- if (emit) cfree_cg_swap(p->cg);
- pcg_swap_type(p);
- if (emit) cfree_cg_store(p->cg, pcg_mem(p, mem_ty ? mem_ty : rv_ty));
+ /* The aux to consume lives on the lvalue slot at parser depth 1. */
+ lv = pcg_lv_aux_at(p, 1);
+ access = pcg_mem(p, mem_ty ? mem_ty : rv_ty);
+ if (lv && lv->bit_width) {
+ access.bit_offset = lv->bit_offset;
+ access.bit_width = lv->bit_width;
+ access.storage_size = lv->storage_size;
+ access.bit_signed = lv->bit_signed;
+ }
+ ea.offset = lv ? lv->offset : 0;
+ ea.scale = lv ? lv->scale : 0u;
+ if (ea.scale == 0 &&
+ !(rv_ty && (rv_ty->kind == TY_INT128 || rv_ty->kind == TY_UINT128 ||
+ rv_ty->kind == TY_LDOUBLE))) {
+ if (emit) {
+ cfree_cg_dup(p->cg);
+ cfree_cg_rot3(p->cg);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, access, ea);
+ }
+ } else if (ea.scale == 0) {
+ /* Simple lvalue: stash rv so the actual destination store does not have
+ * to keep a duplicate expression result live under register pressure. */
+ FrameSlotDesc fsd;
+ FrameSlot tmp;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = rv_ty;
+ fsd.size = c_abi_sizeof(p->abi, rv_ty);
+ fsd.align = c_abi_alignof(p->abi, rv_ty);
+ fsd.kind = FS_LOCAL;
+ tmp = pcg_local(p, &fsd);
+ if (emit) {
+ CfreeCgMemAccess rv_access = pcg_mem(p, rv_ty);
+ CfreeCgEffAddr zero_ea;
+ zero_ea.offset = 0;
+ zero_ea.scale = 0;
+ /* [base, rv] */
+ cfree_cg_push_local(p->cg, tmp); /* [base, rv, tmp] */
+ cfree_cg_swap(p->cg); /* [base, tmp, rv] */
+ cfree_cg_store(p->cg, rv_access, zero_ea); /* [base] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, rv_access, zero_ea); /* [base, rv] */
+ cfree_cg_store(p->cg, access, ea); /* [] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, rv_access, zero_ea); /* [rv] */
+ }
+ } else {
+ /* Indexed lvalue: CG stack on entry is [base, idx, rv]. Stash rv into a
+ * compiler-temp local so we can reorder cleanly, then re-load after the
+ * store. */
+ FrameSlotDesc fsd;
+ FrameSlot tmp;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = rv_ty;
+ fsd.size = c_abi_sizeof(p->abi, rv_ty);
+ fsd.align = c_abi_alignof(p->abi, rv_ty);
+ fsd.kind = FS_LOCAL;
+ tmp = pcg_local(p, &fsd);
+ if (emit) {
+ CfreeCgMemAccess rv_access = pcg_mem(p, rv_ty);
+ CfreeCgEffAddr zero_ea;
+ zero_ea.offset = 0;
+ zero_ea.scale = 0;
+ /* [base, idx, rv] */
+ cfree_cg_push_local(p->cg, tmp); /* [base, idx, rv, tmp] */
+ cfree_cg_swap(p->cg); /* [base, idx, tmp, rv] */
+ cfree_cg_store(p->cg, rv_access, zero_ea); /* [base, idx] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, rv_access, zero_ea); /* [base, idx, rv] */
+ cfree_cg_store(p->cg, access, ea); /* [] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, rv_access, zero_ea); /* [rv] */
+ }
+ }
pcg_drop_type(p);
pcg_drop_type(p);
+ pcg_push_type(p, rv_ty);
}
void pcg_deref(Parser* p, const Type* pointee) {
const Type* ptr_ty = pcg_top_type(p);
if (pointee && pointee->kind == TY_FUNC) {
+ /* Function lvalues collapse to function pointers in C; no CG-level
+ * dereference is needed (functions aren't first-class data). */
pcg_retag_top(p, pointee);
return;
}
@@ -419,10 +640,81 @@ void pcg_deref(Parser* p, const Type* pointee) {
if (pcg_emit_enabled(p)) cfree_cg_bitcast(p->cg, pcg_tid(p, want_ptr_ty));
pcg_retag_top(p, want_ptr_ty);
}
- if (pcg_emit_enabled(p)) cfree_cg_indirect(p->cg);
+ /* No cfree_cg_indirect: the cg load/store accept pointer-rvalue bases
+ * directly. Mark the slot as a C-language lvalue with POINTER_RV base; the
+ * pointer stays on the CG stack untouched. */
pcg_retag_top(p, pointee);
- if (p->cg_type_sp)
+ if (p->cg_type_sp) {
p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(pointee);
+ p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_POINTER_RV;
+ }
+}
+
+/* ---- Lvalue chain helpers ---- */
+
+void pcg_lv_member(Parser* p, i64 byte_offset, const Type* field_ty,
+ u16 bf_offset, u16 bf_width, u32 bf_storage_size) {
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ int was_lvalue = pcg_top_is_lvalue(p);
+ i64 saved_offset = lv ? lv->offset + byte_offset : byte_offset;
+ u32 saved_scale = lv ? lv->scale : 0u;
+ u8 saved_base_kind = lv ? lv->base_kind : PCG_LV_BASE_LOCAL;
+ /* Bumping the offset preserves the base kind and any earlier offset/scale
+ * accumulated on the chain (`a[i].f.g` keeps `scale = sizeof(elem)` and
+ * adds the field offsets). */
+ pcg_retag_top(p, field_ty);
+ if (was_lvalue) pcg_set_top_lvalue(p);
+ /* pcg_retag_top cleared aux; re-apply the bumped offset and base kind. */
+ {
+ PcgLvAux* lv_after = pcg_top_lv_aux(p);
+ if (lv_after) {
+ lv_after->offset = saved_offset;
+ lv_after->scale = saved_scale;
+ lv_after->base_kind = saved_base_kind;
+ lv_after->bit_offset = bf_offset;
+ lv_after->bit_width = bf_width;
+ lv_after->storage_size = bf_storage_size;
+ lv_after->bit_signed = pcg_type_is_signed(field_ty) ? 1u : 0u;
+ }
+ if (bf_width && p->cg_type_sp)
+ p->cg_value_flags[p->cg_type_sp - 1u] |= PCG_VALUE_BITFIELD;
+ }
+}
+
+void pcg_lv_subscript(Parser* p, u32 elem_size, const Type* elem_ty) {
+ /* Stack on entry (parser side): [base_lv, index_rv].
+ * Stack on entry (CG side): [base, index].
+ * After this call (parser): [elem_lv] with aux.scale = elem_size.
+ * After this call (CG): [base, index] — unchanged; the eventual
+ * load/store consumes both via the EA. */
+ PcgLvAux* base_lv = pcg_lv_aux_at(p, 1);
+ i64 saved_offset = base_lv ? base_lv->offset : 0;
+ u8 base_is_lvalue =
+ (p->cg_type_sp >= 2u &&
+ (p->cg_value_flags[p->cg_type_sp - 2u] & PCG_VALUE_LVALUE) != 0);
+ u8 saved_base_kind = !base_is_lvalue
+ ? PCG_LV_BASE_POINTER_RV
+ : (base_lv ? base_lv->base_kind
+ : PCG_LV_BASE_LOCAL);
+ if (base_lv && base_lv->scale != 0) {
+ perr(p, "internal: nested subscript without materialization");
+ }
+ pcg_drop_type(p); /* drop index parser slot */
+ pcg_retag_top(p, elem_ty); /* retag base parser slot as element */
+ pcg_set_top_lvalue(p);
+ {
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ if (lv) {
+ lv->offset = saved_offset;
+ lv->scale = elem_size;
+ lv->base_kind = saved_base_kind;
+ }
+ }
+}
+
+void pcg_decay_array(Parser* p, const Type* arr_ty) {
+ const Type* ptr_ty = type_ptr(p->pool, arr_ty->arr.elem);
+ pcg_materialize_lv_to_ptr(p, ptr_ty);
}
void pcg_binop(Parser* p, BinOp op) {
@@ -515,49 +807,90 @@ void pcg_convert(Parser* p, const Type* dst) {
void pcg_inc_dec(Parser* p, BinOp op, int post) {
const Type* ty = pcg_top_type(p);
- if (pcg_emit_enabled(p)) {
+ if (!pcg_emit_enabled(p)) {
+ /* Drop the lvalue parser slot and push the rvalue result type. */
+ pcg_drop_type(p);
+ pcg_push_type(p, ty);
+ return;
+ }
+ {
CfreeCgIntBinOp cg_op = pcg_int_binop(op);
- CfreeCgMemAccess mem = pcg_mem(p, ty);
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ int indexed = lv && lv->scale != 0;
+ CfreeCgMemAccess access;
+ CfreeCgEffAddr ea;
+ const Type* step_ty = ty;
+ u32 step = 1;
+ pcg_consume_ea_for_top(p, ty, &access, &ea);
if (ty && ty->kind == TY_PTR) {
const Type* pointee = ty->ptr.pointee;
- const Type* idx_ty = c_abi_ptrdiff_type(p->abi, p->pool);
- u32 step;
if (pointee && pointee->kind == TY_VOID)
perr(p, "pointer arithmetic on void pointer");
step = c_abi_sizeof(p->abi, pointee);
- cfree_cg_dup(p->cg); /* [lv, lv] */
- cfree_cg_load(p->cg, mem); /* [lv, old] */
- if (post) {
- FrameSlotDesc fsd;
- FrameSlot old_slot;
- memset(&fsd, 0, sizeof fsd);
- fsd.type = ty;
- fsd.size = c_abi_sizeof(p->abi, ty);
- fsd.align = c_abi_alignof(p->abi, ty);
- fsd.kind = FS_LOCAL;
- old_slot = pcg_local(p, &fsd);
-
- cfree_cg_dup(p->cg); /* [lv, old, old] */
- cfree_cg_push_local(p->cg, old_slot);
- cfree_cg_swap(p->cg); /* [lv, old, tmp, old] */
- cfree_cg_store(p->cg, mem); /* [lv, old] */
- cfree_cg_push_int(p->cg, step, pcg_tid(p, idx_ty));
- cfree_cg_int_binop(p->cg, cg_op, 0); /* [lv, new] */
- cfree_cg_store(p->cg, mem); /* [] */
- cfree_cg_push_local(p->cg, old_slot);
- cfree_cg_load(p->cg, mem); /* [old] */
- } else {
- cfree_cg_push_int(p->cg, step, pcg_tid(p, idx_ty));
- cfree_cg_int_binop(p->cg, cg_op, 0); /* [lv, new] */
- cfree_cg_dup(p->cg); /* [lv, new, new] */
- cfree_cg_rot3(p->cg); /* [new, new, lv] */
- cfree_cg_swap(p->cg); /* [new, lv, new] */
- cfree_cg_store(p->cg, mem); /* [new] */
+ step_ty = c_abi_ptrdiff_type(p->abi, p->pool);
+ }
+ /* Allocate a temp to stash the previous (post=1) or new (post=0) value
+ * for the expression-value. Both indexed and simple paths use the same
+ * stash so the resulting sequence is uniform. */
+ {
+ FrameSlotDesc fsd;
+ FrameSlot tmp;
+ const Type* result_ty = ty;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = result_ty;
+ fsd.size = c_abi_sizeof(p->abi, result_ty);
+ fsd.align = c_abi_alignof(p->abi, result_ty);
+ fsd.kind = FS_LOCAL;
+ tmp = pcg_local(p, &fsd);
+ {
+ CfreeCgMemAccess r_access = pcg_mem(p, result_ty);
+ CfreeCgEffAddr zero_ea = {0, 0};
+ /* Duplicate the lvalue base (+ index if indexed) so we can load the
+ * old value AND store the new value through the same address. */
+ if (indexed) {
+ cfree_cg_dup2(p->cg);
+ } else {
+ cfree_cg_dup(p->cg);
+ }
+ cfree_cg_load(p->cg, access, ea); /* ..., lv-base[, idx], old */
+ if (post) {
+ /* Stash old, compute new, store, then re-load old as result. */
+ cfree_cg_dup(p->cg); /* ..., lv-base[, idx], old, old */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, r_access, zero_ea); /* ..., lv-base[, idx], old */
+ if (ty && ty->kind == TY_PTR) {
+ cfree_cg_push_int(p->cg, step, pcg_tid(p, step_ty));
+ } else {
+ cfree_cg_push_int(p->cg, 1, pcg_tid(p, step_ty));
+ }
+ cfree_cg_int_binop(p->cg, cg_op, 0); /* ..., lv-base[, idx], new */
+ cfree_cg_store(p->cg, access, ea); /* [] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, r_access, zero_ea); /* [old] */
+ } else {
+ /* Compute new, stash new, store, then re-load new as result. */
+ if (ty && ty->kind == TY_PTR) {
+ cfree_cg_push_int(p->cg, step, pcg_tid(p, step_ty));
+ } else {
+ cfree_cg_push_int(p->cg, 1, pcg_tid(p, step_ty));
+ }
+ cfree_cg_int_binop(p->cg, cg_op, 0); /* ..., lv-base[, idx], new */
+ cfree_cg_dup(p->cg); /* ..., lv-base[, idx], new, new */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, r_access, zero_ea); /* ..., lv-base[, idx], new */
+ cfree_cg_store(p->cg, access, ea); /* [] */
+ cfree_cg_push_local(p->cg, tmp);
+ cfree_cg_load(p->cg, r_access, zero_ea); /* [new] */
+ }
+ (void)step;
}
- } else {
- cfree_cg_inc_dec(p->cg, cg_op, post, pcg_tid(p, ty), mem);
}
}
+ /* Parser stack: drop the lvalue slot, push the result rvalue type. */
+ pcg_drop_type(p);
+ pcg_push_type(p, ty);
}
void pcg_call(Parser* p, u32 nargs, const Type* fn_type) {
diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h
@@ -14,6 +14,42 @@ typedef CfreeCgLocal FrameSlot;
#define FRAME_SLOT_NONE CFREE_CG_LOCAL_NONE
#define OBJ_GROUP_NONE 0u
+/* Lvalue auxiliary state, carried parallel to cg_type_stack / cg_value_flags.
+ *
+ * The C parser tracks one logical "C-language value" per stack slot. When that
+ * slot is a C-language lvalue (PCG_VALUE_LVALUE), the aux below records the
+ * pending effective-address modifiers and bit-field metadata that the next
+ * load / store / addr will fold onto the CG memop.
+ *
+ * Lvalue chains (`s.f`, `a[i].g`, etc.) accumulate into this aux instead of
+ * emitting per-step CG ops: there is no CG-level `field` / `index` /
+ * `addr_offset` op anymore. Field offsets bump `offset`; subscripts set
+ * `scale` and leave the evaluated index value on the CG stack just above the
+ * lvalue base. Bit-field selections fill `bit_*`. The aux is consumed by the
+ * very next pcg_load / pcg_store / pcg_addr that crosses the slot.
+ *
+ * `base_kind` records what the CG-stack base under this lvalue actually is —
+ * either an OPK_LOCAL produced by push_local (PCG_LV_BASE_LOCAL) or a
+ * pointer rvalue from push_symbol_addr, push_local_addr, dereference, or
+ * pointer arithmetic (PCG_LV_BASE_POINTER_RV). Stream A's CG-side memops
+ * accept either shape uniformly; pcg_addr uses the distinction to decide
+ * whether to emit cfree_cg_addr or treat the base as already-a-pointer. */
+typedef enum PcgLvBaseKind {
+ PCG_LV_BASE_LOCAL = 0,
+ PCG_LV_BASE_POINTER_RV = 1,
+} PcgLvBaseKind;
+
+typedef struct PcgLvAux {
+ i64 offset;
+ u32 scale;
+ u16 bit_offset;
+ u16 bit_width;
+ u32 storage_size;
+ u8 bit_signed;
+ u8 base_kind; /* PcgLvBaseKind */
+ u8 pad[6];
+} PcgLvAux;
+
typedef enum BinOp {
BO_IADD,
BO_ISUB,
@@ -211,6 +247,45 @@ void pcg_load(Parser*);
void pcg_addr(Parser*);
void pcg_store(Parser*);
void pcg_deref(Parser*, const Type*);
+
+/* ---- Lvalue auxiliary access ----
+ *
+ * pcg_top_lv_aux returns a mutable pointer to TOS's lvalue aux, used by
+ * parse_postfix and the initializer / compound-assignment paths to fold
+ * field offsets and bit-field metadata inline. Returns NULL if the parser
+ * stack is empty; behavior on a non-lvalue TOS is the caller's responsibility
+ * (parse_postfix has already validated lvalueness before calling). */
+PcgLvAux* pcg_top_lv_aux(Parser*);
+PcgLvAux* pcg_lv_aux_at(Parser*, u32 depth);
+
+/* ---- Lvalue chain helpers ----
+ *
+ * Each maps directly to the canonical encodings in doc/INDIRECT.md without
+ * emitting any intermediate field / index / addr_offset CG op. Field offsets
+ * and array scales are accumulated on the TOS lvalue's aux; the next
+ * pcg_load / pcg_store / pcg_addr consumes them. */
+
+/* Fold `s.f` (or any path-resolved field selection) into the TOS lvalue.
+ * byte_offset is the cumulative offset within the record; ty is the field
+ * type; bf_* are bit-field metadata (bf_width == 0 for non-bitfields). The
+ * caller is responsible for verifying TOS is an lvalue of a record type. */
+void pcg_lv_member(Parser*, i64 byte_offset, const Type* field_ty,
+ u16 bf_offset, u16 bf_width, u32 bf_storage_size);
+
+/* Attach `[index]` to the TOS lvalue. PRECONDITION: the index value has just
+ * been pushed onto the CG stack (and parser stack) above the lvalue base;
+ * the parser stack therefore has [base_lv, index] at depth [1, 0]. This call
+ * records `scale = elem_size` on the base's aux, drops the index parser
+ * slot (leaving the index value on the CG stack for the eventual memop),
+ * and retags the surviving slot as elem_ty (lvalue). */
+void pcg_lv_subscript(Parser*, u32 elem_size, const Type* elem_ty);
+
+/* Decay an array lvalue at TOS into a pointer-to-element rvalue. Emits
+ * cfree_cg_addr (or a no-op for pointer-rvalue bases) and folds any pending
+ * EA modifiers into the resulting pointer via ptr arithmetic. After return,
+ * TOS is a pointer rvalue of type `*arr_ty->elem`. */
+void pcg_decay_array(Parser*, const Type* arr_ty);
+
void pcg_binop(Parser*, BinOp);
void pcg_unop(Parser*, UnOp);
void pcg_cmp(Parser*, CmpOp);
diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c
@@ -1032,9 +1032,7 @@ i64 eval_const_int(Parser* p, SrcLoc loc) {
* ============================================================ */
static void decay_array_to_pointer(Parser* p, const Type* arr_ty) {
- const Type* ptr_ty = type_ptr(p->pool, arr_ty->arr.elem);
- if (pcg_emit_enabled(p)) cfree_cg_addr_offset(p->cg, 0, pcg_tid(p, ptr_ty));
- pcg_retag_top(p, ptr_ty);
+ pcg_decay_array(p, arr_ty);
}
static FrameSlot vla_size_slot_for_type(VLABound* bounds, const Type* ty) {
@@ -1046,6 +1044,7 @@ static FrameSlot vla_size_slot_for_type(VLABound* bounds, const Type* ty) {
void to_rvalue(Parser* p) {
const Type* t = cg_top_type(p->cg);
+ int is_lvalue = pcg_top_is_lvalue(p);
if (t) {
if (t->kind == TY_ARRAY) {
decay_array_to_pointer(p, t);
@@ -1056,11 +1055,21 @@ void to_rvalue(Parser* p) {
return;
}
if (t->kind == TY_STRUCT || t->kind == TY_UNION) {
- p->cg_type_stack[p->cg_type_sp - 1u] = type_unqual(p->pool, t);
+ const Type* uty = type_unqual(p->pool, t);
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ int materialize =
+ is_lvalue && lv &&
+ (lv->offset != 0 || lv->scale != 0 ||
+ lv->base_kind == PCG_LV_BASE_POINTER_RV);
+ p->cg_type_stack[p->cg_type_sp - 1u] = uty;
+ if (materialize) {
+ pcg_addr(p);
+ pcg_deref(p, uty);
+ }
return;
}
}
- cg_load(p->cg);
+ if (is_lvalue) cg_load(p->cg);
}
/* ============================================================
@@ -2103,13 +2112,33 @@ static int find_record_member_path(Parser* p, const Type* rec_ty, Sym mname,
static void cg_record_member_path(Parser* p, const Type* member_ty,
const u32* path, u32 depth,
const Field* field) {
- int was_lvalue = pcg_top_is_lvalue(p);
+ /* Walk the path locally to compute the cumulative byte offset; pull
+ * bit-field metadata from the final ABIFieldLayout when applicable. The
+ * field/index/addr_offset CG ops are gone — pcg_lv_member folds the offset
+ * (and any bit-field meta) onto the TOS lvalue's aux for the next memop. */
+ const Type* cur_ty = pcg_top_type(p);
+ i64 total_offset = 0;
+ u16 bf_off = 0;
+ u16 bf_w = 0;
+ u32 bf_ss = 0;
+ cur_ty = type_unqual(p->pool, cur_ty);
for (u32 i = 0; i < depth; ++i) {
- if (pcg_emit_enabled(p)) cfree_cg_field(p->cg, path[i]);
+ const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, cur_ty);
+ const ABIFieldLayout* fl;
+ const Field* f;
+ if (!L) break;
+ fl = &L->fields[path[i]];
+ f = &cur_ty->rec.fields[path[i]];
+ total_offset += (i64)fl->offset;
+ if (i + 1u == depth && (f->flags & FIELD_BITFIELD)) {
+ bf_off = fl->bit_offset;
+ bf_w = fl->bit_width;
+ bf_ss = fl->storage_size;
+ }
+ cur_ty = type_unqual(p->pool, f->type);
}
- pcg_retag_top(p, member_ty);
- if (was_lvalue) pcg_set_top_lvalue(p);
- if (field && (field->flags & FIELD_BITFIELD)) pcg_set_top_bitfield(p);
+ (void)field;
+ pcg_lv_member(p, total_offset, member_ty, bf_off, bf_w, bf_ss);
}
static void parse_postfix(Parser* p) {
@@ -2150,7 +2179,7 @@ static void parse_postfix(Parser* p) {
} else if (top && top->kind == TY_PTR && top->ptr.pointee &&
top->ptr.pointee->kind == TY_FUNC) {
fn_type = top->ptr.pointee;
- cg_load(p->cg);
+ if (pcg_top_is_lvalue(p)) cg_load(p->cg);
} else {
perr(p, "called object is not a function");
}
@@ -2191,7 +2220,7 @@ static void parse_postfix(Parser* p) {
if (lt0 && lt0->kind == TY_ARRAY) {
decay_array_to_pointer(p, lt0);
} else if (lt0 && lt0->kind == TY_PTR) {
- cg_load(p->cg);
+ if (pcg_top_is_lvalue(p)) cg_load(p->cg);
}
parse_expr(p);
{
@@ -2228,10 +2257,8 @@ static void parse_postfix(Parser* p) {
p->last_pushed_vla_slot = elem_vla_slot;
p->last_pushed_vla_bounds = vla_bounds;
} else {
- if (pcg_emit_enabled(p)) cfree_cg_index(p->cg, 0);
- pcg_drop_type(p);
- pcg_retag_top(p, elem);
- pcg_set_top_lvalue(p);
+ u32 elem_size = c_abi_sizeof(p->abi, elem);
+ pcg_lv_subscript(p, elem_size, elem);
}
}
}
@@ -3314,7 +3341,16 @@ void parse_assign_expr(Parser* p) {
cg_store(p->cg);
return;
}
- cg_dup(p->cg);
+ {
+ PcgLvAux* lv = pcg_top_lv_aux(p);
+ if (pcg_emit_enabled(p)) {
+ if (lv && lv->scale != 0)
+ cfree_cg_dup2(p->cg);
+ else
+ cfree_cg_dup(p->cg);
+ }
+ pcg_dup_type(p);
+ }
cg_load(p->cg);
parse_assign_expr(p);
to_rvalue(p);
diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c
@@ -159,23 +159,40 @@ static int try_init_aggregate_from_expr(Parser* p, FrameSlot slot,
* local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */
void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
u32 offset, const Type* elem_ty) {
- const Type* elem_ptr_ty = type_ptr(p->pool, elem_ty);
cg_push_local_typed(p->cg, slot, arr_ty);
+ /* Fold the byte offset onto the local lvalue's aux; the next
+ * load/store/addr will bake it into the memop's ea.offset. The result is an
+ * lvalue of elem_ty backed by the frame slot. */
+ pcg_lv_member(p, (i64)offset, elem_ty, /*bf_off=*/0, /*bf_w=*/0, /*ss=*/0);
+}
+
+static void zero_object_bytes_at(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty) {
+ CfreeCgMemAccess access = pcg_mem(p, ty);
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ pcg_addr(p);
if (pcg_emit_enabled(p)) {
- cfree_cg_addr_offset(p->cg, (i64)offset, pcg_tid(p, elem_ptr_ty));
+ cfree_cg_memset(p->cg, 0, c_abi_sizeof(p->abi, ty), access);
}
- pcg_retag_top(p, elem_ptr_ty);
- cg_deref(p->cg, elem_ty);
+ pcg_drop_type(p);
}
static void push_record_field_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
u32 rec_offset, const Type* rec_ty,
u32 field_index) {
const Field* f = &rec_ty->rec.fields[field_index];
+ const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, rec_ty);
+ u32 foff = L->fields[field_index].offset;
+ u16 bf_off = 0;
+ u16 bf_w = 0;
+ u32 bf_ss = 0;
push_subobject_lv(p, slot, arr_ty, rec_offset, rec_ty);
- if (pcg_emit_enabled(p)) cfree_cg_field(p->cg, field_index);
- pcg_retag_top(p, f->type);
- if (f->flags & FIELD_BITFIELD) pcg_set_top_bitfield(p);
+ if (f->flags & FIELD_BITFIELD) {
+ bf_off = L->fields[field_index].bit_offset;
+ bf_w = L->fields[field_index].bit_width;
+ bf_ss = L->fields[field_index].storage_size;
+ }
+ pcg_lv_member(p, (i64)foff, f->type, bf_off, bf_w, bf_ss);
}
/* Emit a load+store for one scalar leaf. */
@@ -183,15 +200,14 @@ static void emit_copy_leaf(Parser* p, FrameSlot dst_slot,
const Type* dst_arr_ty, u32 dst_off,
FrameSlot src_ptr_slot, const Type* src_ptr_ty,
u32 src_off, const Type* leaf_ty) {
- const Type* leaf_ptr_ty = type_ptr(p->pool, leaf_ty);
push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty);
cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty);
cg_load(p->cg);
- if (pcg_emit_enabled(p)) {
- cfree_cg_addr_offset(p->cg, (i64)src_off, pcg_tid(p, leaf_ptr_ty));
- }
- pcg_retag_top(p, leaf_ptr_ty);
- cg_deref(p->cg, leaf_ty);
+ /* TOS is now a pointer rvalue (the loaded source pointer). Retag as a
+ * C-language lvalue with POINTER_RV base, then fold the source byte offset
+ * onto its aux. The next cg_load consumes the EA into the memop. */
+ pcg_deref(p, leaf_ty);
+ pcg_lv_member(p, (i64)src_off, leaf_ty, 0, 0, 0);
cg_load(p->cg);
cg_store(p->cg);
cg_drop(p->cg);
@@ -679,6 +695,7 @@ void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
return;
}
advance(p); /* '{' */
+ zero_object_bytes_at(p, slot, arr_ty, offset, ty);
init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1);
expect_punct(p, '}', "'}' after struct initializer");
return;
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -197,6 +197,7 @@ typedef struct Parser {
const Type** cg_type_stack;
u8* cg_value_flags;
+ PcgLvAux* cg_lv_aux;
u32 cg_type_sp;
u32 cg_type_cap;
diff --git a/lang/toy/asm.c b/lang/toy/asm.c
@@ -473,15 +473,16 @@ int toy_parse_typed_asm_tail(ToyParser* p, CfreeCgTypeId result_ty,
while (i > 0) {
CfreeCgField field;
uint32_t field_index;
+ uint64_t foff = 0;
--i;
field_index = record_field_indexes ? record_field_indexes[i] : i;
if (cfree_cg_type_record_field(p->c, result_ty, field_index, &field,
- NULL) != 0)
+ &foff) != 0)
goto done;
cfree_cg_push_local(p->cg, rec_slot);
- cfree_cg_field(p->cg, field_index);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)foff, 0});
}
cfree_cg_push_local(p->cg, rec_slot);
}
diff --git a/lang/toy/builtins.c b/lang/toy/builtins.c
@@ -31,20 +31,20 @@ static void toy_store_top_to_local(ToyParser* p, CfreeCgLocal local,
CfreeCgTypeId ty) {
cfree_cg_push_local(p->cg, local);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
}
static void toy_store_const_to_local(ToyParser* p, CfreeCgLocal local,
CfreeCgTypeId ty, uint64_t value) {
cfree_cg_push_local(p->cg, local);
cfree_cg_push_int(p->cg, value, ty);
- cfree_cg_store(p->cg, toy_mem_access(p, ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
}
static void toy_push_loaded_local(ToyParser* p, CfreeCgLocal local,
CfreeCgTypeId ty) {
cfree_cg_push_local(p->cg, local);
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
}
static void toy_emit_dynamic_memory_loop(ToyParser* p, CfreeCgLocal dst_local,
@@ -68,22 +68,24 @@ static void toy_emit_dynamic_memory_loop(ToyParser* p, CfreeCgLocal dst_local,
toy_push_loaded_local(p, dst_local, u8_ptr_ty);
toy_push_loaded_local(p, index_local, p->int_type);
- cfree_cg_index(p->cg, 0);
+ /* The destination memop carries scale = sizeof(u8) = 1; it pops
+ * [dst_ptr, index] and then [value]. */
if (is_memset) {
cfree_cg_push_int(p->cg, set_value, u8_ty);
} else {
toy_push_loaded_local(p, src_local, u8_ptr_ty);
toy_push_loaded_local(p, index_local, p->int_type);
- cfree_cg_index(p->cg, 0);
- cfree_cg_load(p->cg, toy_mem_access(p, u8_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, u8_ty),
+ (CfreeCgEffAddr){0, 1});
}
- cfree_cg_store(p->cg, toy_mem_access(p, u8_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, u8_ty),
+ (CfreeCgEffAddr){0, 1});
cfree_cg_push_local(p->cg, index_local);
toy_push_loaded_local(p, index_local, p->int_type);
cfree_cg_push_int(p->cg, 1, p->int_type);
cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0);
- cfree_cg_store(p->cg, toy_mem_access(p, p->int_type));
+ cfree_cg_store(p->cg, toy_mem_access(p, p->int_type), (CfreeCgEffAddr){0, 0});
cfree_cg_jump(p->cg, loop_label);
cfree_cg_label_place(p->cg, end_label);
@@ -552,17 +554,17 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name,
dst_slot = cfree_cg_local(p->cg, dst_ty, toy_slot_attrs(0));
cfree_cg_push_local(p->cg, src_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, src_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, src_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(p->cg, dst_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, dst_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, dst_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(p->cg, dst_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, dst_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, dst_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(p->cg, clear_mask, dst_ty);
cfree_cg_int_binop(p->cg, CFREE_CG_INT_AND, 0);
cfree_cg_push_local(p->cg, src_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, src_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, src_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(p->cg, src_mask, src_ty);
cfree_cg_int_binop(p->cg, CFREE_CG_INT_AND, 0);
if (lo > 0) {
@@ -589,28 +591,11 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name,
toy_error(p, p->cur.loc, "fma expects matching float operands");
return CFREE_CG_TYPE_NONE;
}
- {
- CfreeCgLocal c_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0));
- CfreeCgLocal b_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0));
- CfreeCgLocal a_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0));
- cfree_cg_push_local(p->cg, c_slot);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, a));
- cfree_cg_push_local(p->cg, b_slot);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, a));
- cfree_cg_push_local(p->cg, a_slot);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, a));
- cfree_cg_push_local(p->cg, a_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, a));
- cfree_cg_push_local(p->cg, b_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, a));
- cfree_cg_fp_binop(p->cg, CFREE_CG_FP_MUL, 0);
- cfree_cg_push_local(p->cg, c_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, a));
- cfree_cg_fp_binop(p->cg, CFREE_CG_FP_ADD, 0);
- }
+ cfree_cg_rot3(p->cg); /* [b, c, a] */
+ cfree_cg_rot3(p->cg); /* [c, a, b] */
+ cfree_cg_fp_binop(p->cg, CFREE_CG_FP_MUL, 0);
+ cfree_cg_swap(p->cg);
+ cfree_cg_fp_binop(p->cg, CFREE_CG_FP_ADD, 0);
return a;
}
@@ -980,14 +965,19 @@ CfreeCgTypeId toy_parse_generic_builtin(ToyParser* p, CfreeSym name,
fields[1].type = toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL);
rec_ty = cfree_cg_type_record(p->c, 0, fields, 2);
rec_slot = cfree_cg_local(p->cg, rec_ty, toy_slot_attrs(0));
- cfree_cg_push_local(p->cg, rec_slot);
- cfree_cg_field(p->cg, 1);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type));
- cfree_cg_push_local(p->cg, rec_slot);
- cfree_cg_field(p->cg, 0);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type));
+ {
+ uint64_t f0_off = 0, f1_off = 0;
+ cfree_cg_type_record_field(p->c, rec_ty, 0, NULL, &f0_off);
+ cfree_cg_type_record_field(p->c, rec_ty, 1, NULL, &f1_off);
+ cfree_cg_push_local(p->cg, rec_slot);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type),
+ (CfreeCgEffAddr){(int64_t)f1_off, 0});
+ cfree_cg_push_local(p->cg, rec_slot);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type),
+ (CfreeCgEffAddr){(int64_t)f0_off, 0});
+ }
cfree_cg_push_local(p->cg, rec_slot);
return rec_ty;
}
@@ -1295,14 +1285,19 @@ CfreeCgTypeId toy_parse_atomic_generic_builtin(ToyParser* p, CfreeSym name,
fields[1].type = toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL);
rec_ty = cfree_cg_type_record(p->c, 0, fields, 2);
rec_slot = cfree_cg_local(p->cg, rec_ty, toy_slot_attrs(0));
- cfree_cg_push_local(p->cg, rec_slot);
- cfree_cg_field(p->cg, 1);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type));
- cfree_cg_push_local(p->cg, rec_slot);
- cfree_cg_field(p->cg, 0);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type));
+ {
+ uint64_t f0_off = 0, f1_off = 0;
+ cfree_cg_type_record_field(p->c, rec_ty, 0, NULL, &f0_off);
+ cfree_cg_type_record_field(p->c, rec_ty, 1, NULL, &f1_off);
+ cfree_cg_push_local(p->cg, rec_slot);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type),
+ (CfreeCgEffAddr){(int64_t)f1_off, 0});
+ cfree_cg_push_local(p->cg, rec_slot);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type),
+ (CfreeCgEffAddr){(int64_t)f0_off, 0});
+ }
cfree_cg_push_local(p->cg, rec_slot);
return rec_ty;
}
diff --git a/lang/toy/expr.c b/lang/toy/expr.c
@@ -42,16 +42,16 @@ CfreeCgTypeId toy_push_named_rvalue(ToyParser* p, CfreeSym name) {
if (v) {
toy_push_var_lvalue(p, v);
if (cfree_cg_type_kind(p->c, v->type) != CFREE_CG_TYPE_RECORD)
- cfree_cg_load(p->cg, toy_mem_access(p, v->type));
+ cfree_cg_load(p->cg, toy_mem_access(p, v->type), (CfreeCgEffAddr){0, 0});
p->last_type = v->toy_type;
return v->type;
}
{
ToyGlobal* g = toy_find_global(p, name);
if (g) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
if (cfree_cg_type_kind(p->c, g->type) != CFREE_CG_TYPE_RECORD)
- cfree_cg_load(p->cg, toy_mem_access(p, g->type));
+ cfree_cg_load(p->cg, toy_mem_access(p, g->type), (CfreeCgEffAddr){0, 0});
p->last_type = g->toy_type;
return g->type;
}
@@ -458,7 +458,7 @@ CfreeCgTypeId toy_emit_var_lvalue(ToyParser* p, CfreeSym name) {
{
ToyGlobal* g = toy_find_global(p, name);
if (g) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
return g->type;
}
}
@@ -474,9 +474,11 @@ static void toy_store_tos_to_local(ToyParser* p, CfreeCgLocal local,
CfreeCgTypeId ty) {
cfree_cg_push_local(p->cg, local);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
}
+/* Consumes [slice_base, idx] where slice_base is a pointer to the slice
+ * record. Produces [elem_ptr] -- a pointer-rvalue to the element. */
CfreeCgTypeId toy_emit_slice_index_lvalue(ToyParser* p, CfreeCgTypeId slice_ty,
ToyTypeId slice_toy_type,
ToyTypeId* elem_toy_out) {
@@ -484,19 +486,25 @@ CfreeCgTypeId toy_emit_slice_index_lvalue(ToyParser* p, CfreeCgTypeId slice_ty,
CfreeCgTypeId elem_ty = toy_type_id_cg_or_none(p, elem_toy);
CfreeCgField ptr_field;
CfreeCgLocal idx_slot;
+ uint64_t ptr_field_off = 0;
if (elem_ty == CFREE_CG_TYPE_NONE ||
cfree_cg_type_kind(p->c, slice_ty) != CFREE_CG_TYPE_RECORD ||
- cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, NULL) != 0) {
+ cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field,
+ &ptr_field_off) != 0) {
toy_error(p, p->cur.loc, "cannot index non-array/non-pointer");
return CFREE_CG_TYPE_NONE;
}
+ /* Stash the index, then load the slice's ptr field, then re-push idx
+ * and compute element pointer. */
idx_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
toy_store_tos_to_local(p, idx_slot, p->int_type);
- cfree_cg_field(p->cg, 0);
- cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type));
+ cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type),
+ (CfreeCgEffAddr){(int64_t)ptr_field_off, 0});
cfree_cg_push_local(p->cg, idx_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_index(p->cg, 0);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
if (elem_toy_out) *elem_toy_out = elem_toy;
return elem_ty;
}
@@ -536,39 +544,59 @@ CfreeCgTypeId toy_emit_slice_value(ToyParser* p, CfreeCgTypeId base_ty,
slice_toy = toy_type_register_slice(p, elem_ty, elem_toy);
slice_ty = toy_type_cg(p, slice_toy);
- if (slice_ty == CFREE_CG_TYPE_NONE ||
- cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, NULL) != 0) {
- toy_error(p, p->cur.loc, "failed to create slice type");
- return CFREE_CG_TYPE_NONE;
- }
+ {
+ uint64_t ptr_off = 0;
+ uint64_t len_off = 0;
+ CfreeCgField len_field;
+ (void)len_field;
+ if (slice_ty == CFREE_CG_TYPE_NONE ||
+ cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, &ptr_off) != 0 ||
+ cfree_cg_type_record_field(p->c, slice_ty, 1, &len_field, &len_off) != 0) {
+ toy_error(p, p->cur.loc, "failed to create slice type");
+ return CFREE_CG_TYPE_NONE;
+ }
- end_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
- toy_store_tos_to_local(p, end_slot, p->int_type);
- start_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
- toy_store_tos_to_local(p, start_slot, p->int_type);
+ end_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
+ toy_store_tos_to_local(p, end_slot, p->int_type);
+ start_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
+ toy_store_tos_to_local(p, start_slot, p->int_type);
+
+ result_slot = cfree_cg_local(p->cg, slice_ty, toy_slot_attrs(0));
+ /* TOS = [base] (slice lvalue/pointer or array lvalue/pointer). */
+ if (toy_type_is_slice(p, base_toy_type)) {
+ /* Replace slice base with its data pointer (a pointer-rvalue). */
+ cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type),
+ (CfreeCgEffAddr){(int64_t)ptr_off, 0});
+ } else {
+ /* Array base: TOS is a pointer-rvalue (callers always project
+ * to a pointer for array/slice bases now). Bitcast to *elem. */
+ cfree_cg_bitcast(p->cg, cfree_cg_type_ptr(p->c, elem_ty, 0));
+ }
+ /* Compute (base + start * sizeof(elem)) as a pointer to the slice's
+ * first element. */
+ cfree_cg_push_local(p->cg, start_slot);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
+ /* Store the data pointer into result_slot.ptr. */
+ cfree_cg_push_local(p->cg, result_slot);
+ cfree_cg_swap(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, ptr_field.type),
+ (CfreeCgEffAddr){(int64_t)ptr_off, 0});
- result_slot = cfree_cg_local(p->cg, slice_ty, toy_slot_attrs(0));
- if (toy_type_is_slice(p, base_toy_type)) {
- cfree_cg_field(p->cg, 0);
- cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type));
+ /* len = end - start; store into result_slot.len. */
+ cfree_cg_push_local(p->cg, result_slot);
+ cfree_cg_push_local(p->cg, end_slot);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ cfree_cg_push_local(p->cg, start_slot);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_SUB, 0);
+ cfree_cg_store(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){(int64_t)len_off, 0});
}
- cfree_cg_push_local(p->cg, start_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_index(p->cg, 0);
- cfree_cg_addr(p->cg);
- cfree_cg_push_local(p->cg, result_slot);
- cfree_cg_field(p->cg, 0);
- cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, ptr_field.type));
-
- cfree_cg_push_local(p->cg, result_slot);
- cfree_cg_field(p->cg, 1);
- cfree_cg_push_local(p->cg, end_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_push_local(p->cg, start_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_int_binop(p->cg, CFREE_CG_INT_SUB, 0);
- cfree_cg_store(p->cg, toy_mem_access(p, p->int_type));
cfree_cg_push_local(p->cg, result_slot);
if (slice_toy_out) *slice_toy_out = slice_toy;
@@ -729,7 +757,12 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) {
ToyVar* v = toy_find_var(p, name);
if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY ||
cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) {
- toy_push_var_lvalue(p, v);
+ if (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY) {
+ /* Array slicing/indexing wants a pointer root for address math. */
+ toy_push_var_addr(p, v);
+ } else {
+ toy_push_var_lvalue(p, v);
+ }
p->last_type = v->toy_type;
return v->type;
}
@@ -738,7 +771,7 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) {
if (g &&
(cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY ||
cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
p->last_type = g->toy_type;
return g->type;
}
@@ -755,7 +788,7 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) {
{
ToyGlobal* g = toy_find_global(p, name);
if (g && toy_type_is_slice(p, g->toy_type)) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
p->last_type = g->toy_type;
return g->type;
}
@@ -798,8 +831,8 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
}
ty = cfree_cg_type_ptr_pointee(p->c, ty);
toy_ty = toy_type_pointee(p, toy_ty);
- cfree_cg_indirect(p->cg);
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ /* TOS is a pointer-rvalue; memop accepts it as base directly. */
+ cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty);
continue;
}
@@ -834,15 +867,13 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, ty);
ToyTypeId source_pointee = toy_type_pointee(p, toy_ty);
if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) {
- CfreeCgLocal idx_slot =
- cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
- cfree_cg_push_local(p->cg, idx_slot);
+ /* TOS = [ptr-to-array, idx]. Cast pointer to *elem so the load
+ * can apply the array's elem-scale on the next memop. */
+ CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee);
+ cfree_cg_swap(p->cg);
+ cfree_cg_bitcast(p->cg, cfree_cg_type_ptr(p->c, elem_ty, 0));
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_indirect(p->cg);
- cfree_cg_push_local(p->cg, idx_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- ty = cfree_cg_type_array_elem(p->c, pointee);
+ ty = elem_ty;
toy_ty = toy_type_array_elem(p, source_pointee);
} else {
ty = pointee;
@@ -854,15 +885,16 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
} else if (toy_type_is_slice(p, toy_ty)) {
ty = toy_emit_slice_index_lvalue(p, ty, toy_ty, &toy_ty);
if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty);
continue;
} else {
toy_error(p, p->cur.loc, "cannot index non-array/non-pointer");
return CFREE_CG_TYPE_NONE;
}
- cfree_cg_index(p->cg, 0);
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ /* TOS = [base, idx]; load with element-size scale. */
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, ty)});
p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty);
continue;
}
@@ -872,14 +904,16 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
uint32_t i, nfields;
int found = 0;
CfreeCgField found_field;
+ uint64_t found_off = 0;
ToyNamedType* named;
ToyTypeId field_toy_type = TOY_TYPE_NONE;
if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR &&
cfree_cg_type_kind(p->c, cfree_cg_type_ptr_pointee(p->c, ty)) ==
CFREE_CG_TYPE_RECORD) {
+ /* TOS is a pointer to the record; the memop accepts it directly,
+ * so no intermediate op is needed -- we just update `ty`. */
ty = cfree_cg_type_ptr_pointee(p->c, ty);
toy_ty = toy_type_pointee(p, toy_ty);
- cfree_cg_indirect(p->cg);
}
named = toy_find_named_type_by_type(p, ty);
if (p->cur.kind == TOK_NUMBER && !p->cur.is_float) {
@@ -894,14 +928,14 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
field_index = (uint32_t)p->cur.int_value;
toy_parser_advance(p);
if (cfree_cg_type_record_field(p->c, ty, field_index, &found_field,
- NULL) != 0) {
+ &found_off) != 0) {
return CFREE_CG_TYPE_NONE;
}
- cfree_cg_field(p->cg, field_index);
if (named && field_index < named->nfields)
field_toy_type = named->fields[field_index].toy_type;
ty = found_field.type;
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){(int64_t)found_off, 0});
if (field_toy_type != TOY_TYPE_NONE) {
CfreeCgTypeId resolved = toy_type_resolved_cg(p, field_toy_type);
p->last_type = field_toy_type;
@@ -930,13 +964,14 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
memset(&found_field, 0, sizeof found_field);
for (i = 0; i < nfields; ++i) {
CfreeCgField field;
- if (cfree_cg_type_record_field(p->c, ty, i, &field, NULL) == 0 &&
+ uint64_t off = 0;
+ if (cfree_cg_type_record_field(p->c, ty, i, &field, &off) == 0 &&
field.name == field_name) {
found = 1;
found_field = field;
+ found_off = off;
if (named && i < named->nfields)
field_toy_type = named->fields[i].toy_type;
- cfree_cg_field(p->cg, i);
break;
}
}
@@ -945,7 +980,8 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) {
return CFREE_CG_TYPE_NONE;
}
ty = found_field.type;
- cfree_cg_load(p->cg, toy_mem_access(p, ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){(int64_t)found_off, 0});
if (field_toy_type != TOY_TYPE_NONE) {
CfreeCgTypeId resolved = toy_type_resolved_cg(p, field_toy_type);
p->last_type = field_toy_type;
@@ -1029,44 +1065,24 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
if (p->cur.kind == TOK_LBRACKET || p->cur.kind == TOK_DOTSTAR ||
p->cur.kind == TOK_DOT) {
+ /* `&expr` chain. Maintain the invariant that TOS holds a
+ * pointer-rvalue of type *ty. The chain returns a pointer-rvalue
+ * directly (no trailing cfree_cg_addr needed). */
ToyTypeId ty_toy = TOY_TYPE_NONE;
- if (p->cur.kind == TOK_DOT) {
- ToyVar* v = toy_find_var(p, name);
- ToyGlobal* g = toy_find_global(p, name);
- if (v && cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD) {
- toy_push_var_lvalue(p, v);
- ty = v->type;
- ty_toy = v->toy_type;
- } else if (g &&
- cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
- ty = g->type;
- ty_toy = g->toy_type;
- } else {
- ty = toy_push_named_rvalue(p, name);
- ty_toy = p->last_type;
- }
- } else if (p->cur.kind == TOK_LBRACKET) {
+ {
ToyVar* v = toy_find_var(p, name);
ToyGlobal* g = toy_find_global(p, name);
- if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY ||
- cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) {
- toy_push_var_lvalue(p, v);
+ if (v) {
+ toy_push_var_addr(p, v);
ty = v->type;
ty_toy = v->toy_type;
- } else if (g &&
- (cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY ||
- cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ } else if (g) {
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
ty = g->type;
ty_toy = g->toy_type;
} else {
- ty = toy_push_named_rvalue(p, name);
- ty_toy = p->last_type;
+ ty = CFREE_CG_TYPE_NONE;
}
- } else {
- ty = toy_push_named_rvalue(p, name);
- ty_toy = p->last_type;
}
if (ty == CFREE_CG_TYPE_NONE) {
toy_error(p, p->cur.loc, "undefined variable");
@@ -1086,24 +1102,38 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
}
if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR) {
CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, ty);
- if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) {
+ /* TOS holds **T_chain; load the inner pointer first. */
+ {
CfreeCgLocal idx_slot =
cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
cfree_cg_push_local(p->cg, idx_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_indirect(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(p->cg, idx_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- ty = cfree_cg_type_array_elem(p->c, pointee);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ }
+ if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) {
+ CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee);
+ ty = elem_ty;
ty_toy = toy_type_array_elem(p, toy_type_pointee(p, ty_toy));
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
} else {
ty = pointee;
ty_toy = toy_type_pointee(p, ty_toy);
+ toy_addr_index(p, cfree_cg_type_size(p->c, ty),
+ cfree_cg_type_ptr(p->c, ty, 0));
}
} else if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_ARRAY) {
- ty = cfree_cg_type_array_elem(p->c, ty);
+ CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, ty);
+ ty = elem_ty;
ty_toy = toy_type_array_elem(p, ty_toy);
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
} else if (toy_type_is_slice(p, ty_toy)) {
ToyTypeId elem_toy = TOY_TYPE_NONE;
ty = toy_emit_slice_index_lvalue(p, ty, ty_toy, &elem_toy);
@@ -1113,7 +1143,6 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
toy_error(p, p->cur.loc, "cannot index non-array/non-pointer");
return CFREE_CG_TYPE_NONE;
}
- cfree_cg_index(p->cg, 0);
continue;
}
if (toy_parser_match(p, TOK_DOTSTAR)) {
@@ -1121,22 +1150,27 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
toy_error(p, p->cur.loc, "cannot dereference non-pointer");
return CFREE_CG_TYPE_NONE;
}
+ /* TOS = **T; load to TOS = *T. */
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){0, 0});
ty = cfree_cg_type_ptr_pointee(p->c, ty);
ty_toy = toy_type_pointee(p, ty_toy);
- cfree_cg_indirect(p->cg);
continue;
}
if (toy_parser_match(p, TOK_DOT)) {
CfreeCgField field;
uint32_t field_index = 0;
+ uint64_t foff = 0;
ToyNamedType* named;
if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR &&
cfree_cg_type_kind(p->c,
cfree_cg_type_ptr_pointee(p->c, ty)) ==
CFREE_CG_TYPE_RECORD) {
+ /* TOS = **Rec; load to *Rec. */
+ cfree_cg_load(p->cg, toy_mem_access(p, ty),
+ (CfreeCgEffAddr){0, 0});
ty = cfree_cg_type_ptr_pointee(p->c, ty);
ty_toy = toy_type_pointee(p, ty_toy);
- cfree_cg_indirect(p->cg);
}
if (cfree_cg_type_kind(p->c, ty) != CFREE_CG_TYPE_RECORD) {
toy_error(p, p->cur.loc, "field access on non-record");
@@ -1153,7 +1187,7 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
field_index = (uint32_t)p->cur.int_value;
toy_parser_advance(p);
if (cfree_cg_type_record_field(p->c, ty, field_index, &field,
- NULL) != 0)
+ &foff) != 0)
return CFREE_CG_TYPE_NONE;
} else {
CfreeSym field_name;
@@ -1168,17 +1202,21 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
toy_error(p, p->cur.loc, "unknown record field");
return CFREE_CG_TYPE_NONE;
}
+ if (cfree_cg_type_record_field(p->c, ty, field_index, NULL,
+ &foff) != 0)
+ return CFREE_CG_TYPE_NONE;
}
- cfree_cg_field(p->cg, field_index);
ty = field.type;
ty_toy = (named && field_index < named->nfields)
? named->fields[field_index].toy_type
: toy_type_from_cg(p, ty);
+ toy_addr_offset(p, (int64_t)foff,
+ cfree_cg_type_ptr(p->c, ty, 0));
continue;
}
break;
}
- cfree_cg_addr(p->cg);
+ /* TOS already holds a pointer-rvalue of type *ty. */
{
CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(p->c, ty, 0);
p->last_type = toy_type_register_ptr(
@@ -1505,15 +1543,15 @@ static CfreeCgTypeId toy_parse_expr_and(ToyParser* p) {
cfree_cg_branch_false(p->cg, false_label);
cfree_cg_push_local(p->cg, result_slot);
cfree_cg_push_int(p->cg, 1, bool_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_jump(p->cg, end_label);
cfree_cg_label_place(p->cg, false_label);
cfree_cg_push_local(p->cg, result_slot);
cfree_cg_push_int(p->cg, 0, bool_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_label_place(p->cg, end_label);
cfree_cg_push_local(p->cg, result_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
ty = bool_ty;
toy_note_cg_result_type(p, ty);
}
@@ -1544,15 +1582,15 @@ static CfreeCgTypeId toy_parse_expr_or(ToyParser* p) {
cfree_cg_branch_true(p->cg, true_label);
cfree_cg_push_local(p->cg, result_slot);
cfree_cg_push_int(p->cg, 0, bool_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_jump(p->cg, end_label);
cfree_cg_label_place(p->cg, true_label);
cfree_cg_push_local(p->cg, result_slot);
cfree_cg_push_int(p->cg, 1, bool_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
cfree_cg_label_place(p->cg, end_label);
cfree_cg_push_local(p->cg, result_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, bool_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0});
ty = bool_ty;
toy_note_cg_result_type(p, ty);
}
diff --git a/lang/toy/internal.h b/lang/toy/internal.h
@@ -403,6 +403,11 @@ ToyScope* toy_find_scope(ToyParser* p, CfreeSym name);
ToyScope* toy_find_innermost_loop_scope(ToyParser* p);
void toy_push_var_lvalue(ToyParser* p, const ToyVar* v);
void toy_push_var_addr(ToyParser* p, const ToyVar* v);
+/* Chain helpers: see comments in symbols.c. Both consume and produce
+ * pointer-rvalue TOS. */
+void toy_addr_offset(ToyParser* p, int64_t offset, CfreeCgTypeId result_ptr_ty);
+void toy_addr_index(ToyParser* p, uint64_t elem_size,
+ CfreeCgTypeId result_ptr_ty);
CfreeCgSym toy_find_decl_sym(ToyParser* p, CfreeSym name);
int toy_parse_program(ToyParser* p);
diff --git a/lang/toy/parser.c b/lang/toy/parser.c
@@ -33,11 +33,12 @@ int toy_parse_block(ToyParser* p) {
return 1;
}
-static void toy_push_local_index(ToyParser* p, CfreeCgLocal slot,
- uint64_t index) {
+/* Pushes [base, index] onto the value stack as the EA-shaped pair consumed
+ * by an indexed memop. Callers supply the element size in the store's EA. */
+static void toy_push_local_indexed(ToyParser* p, CfreeCgLocal slot,
+ uint64_t index) {
cfree_cg_push_local(p->cg, slot);
cfree_cg_push_int(p->cg, index, p->int_type);
- cfree_cg_index(p->cg, 0);
}
static int toy_check_source_value(ToyParser* p, CfreeCgTypeId expected_cg,
@@ -97,14 +98,15 @@ static int toy_copy_record_lvalue_to_local(ToyParser* p, CfreeCgTypeId src_ty,
nfields = cfree_cg_type_record_nfields(p->c, dst_ty);
for (i = 0; i < nfields; ++i) {
CfreeCgField field;
- if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, NULL)) return 0;
+ uint64_t offset = 0;
+ if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, &offset)) return 0;
cfree_cg_dup(p->cg);
- cfree_cg_field(p->cg, i);
- cfree_cg_load(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_load(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)offset, 0});
cfree_cg_push_local(p->cg, dst_slot);
- cfree_cg_field(p->cg, i);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)offset, 0});
}
cfree_cg_drop(p->cg);
return 1;
@@ -123,18 +125,19 @@ static int toy_copy_record_lvalue_to_var(ToyParser* p, CfreeCgTypeId src_ty,
nfields = cfree_cg_type_record_nfields(p->c, dst_ty);
for (i = 0; i < nfields; ++i) {
CfreeCgField field;
- if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, NULL)) return 0;
+ uint64_t offset = 0;
+ if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, &offset)) return 0;
cfree_cg_dup(p->cg);
- cfree_cg_field(p->cg, i);
- cfree_cg_load(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_load(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)offset, 0});
if (dst_var) {
toy_push_var_lvalue(p, dst_var);
} else {
- cfree_cg_push_symbol_lvalue(p->cg, dst_global->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, dst_global->sym, 0);
}
- cfree_cg_field(p->cg, i);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)offset, 0});
}
cfree_cg_drop(p->cg);
return 1;
@@ -158,7 +161,7 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot,
toy_error(p, p->cur.loc, "too many array elements");
return 0;
}
- toy_push_local_index(p, slot, index);
+ toy_push_local_indexed(p, slot, index);
expr_ty = toy_parse_expr(p);
if (expr_ty == CFREE_CG_TYPE_NONE) return 0;
if (!toy_check_source_value(p, elem_ty, elem_toy_type, expr_ty,
@@ -166,7 +169,8 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot,
return 0;
}
if (expr_ty != elem_ty) cfree_cg_bitcast(p->cg, elem_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, elem_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, elem_ty),
+ (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, elem_ty)});
index++;
if (!toy_parser_match(p, TOK_COMMA)) break;
}
@@ -175,9 +179,10 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot,
return 0;
}
while (index < count) {
- toy_push_local_index(p, slot, index);
+ toy_push_local_indexed(p, slot, index);
cfree_cg_push_int(p->cg, 0, elem_ty);
- cfree_cg_store(p->cg, toy_mem_access(p, elem_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, elem_ty),
+ (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, elem_ty)});
index++;
}
return 1;
@@ -198,12 +203,13 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
for (i = 0; i < nfields; ++i) {
CfreeCgField field;
- if (cfree_cg_type_record_field(p->c, record_ty, i, &field, NULL) != 0)
+ uint64_t foff = 0;
+ if (cfree_cg_type_record_field(p->c, record_ty, i, &field, &foff) != 0)
return 0;
cfree_cg_push_local(p->cg, slot);
- cfree_cg_field(p->cg, i);
cfree_cg_push_int(p->cg, 0, field.type);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)foff, 0});
}
if (positional) {
@@ -211,15 +217,15 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
while (p->cur.kind != TOK_RBRACE && p->cur.kind != TOK_EOF) {
CfreeCgField field;
CfreeCgTypeId expr_ty;
+ uint64_t foff = 0;
if (field_index >= nfields) {
toy_error(p, p->cur.loc, "too many tuple fields");
return 0;
}
if (cfree_cg_type_record_field(p->c, record_ty, field_index, &field,
- NULL) != 0)
+ &foff) != 0)
return 0;
cfree_cg_push_local(p->cg, slot);
- cfree_cg_field(p->cg, field_index);
expr_ty = toy_parse_expr(p);
if (expr_ty == CFREE_CG_TYPE_NONE) return 0;
{
@@ -234,7 +240,8 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
}
}
if (expr_ty != field.type) cfree_cg_bitcast(p->cg, field.type);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)foff, 0});
field_index++;
if (!toy_parser_match(p, TOK_COMMA)) break;
}
@@ -250,6 +257,7 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
CfreeCgField field;
uint32_t field_index;
CfreeCgTypeId expr_ty;
+ uint64_t foff = 0;
if (p->cur.kind != TOK_IDENT) {
toy_error(p, p->cur.loc, "expected field name");
return 0;
@@ -264,8 +272,9 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
toy_error(p, p->cur.loc, "unknown record field");
return 0;
}
+ if (cfree_cg_type_record_field(p->c, record_ty, field_index, NULL, &foff) != 0)
+ return 0;
cfree_cg_push_local(p->cg, slot);
- cfree_cg_field(p->cg, field_index);
expr_ty = toy_parse_expr(p);
if (expr_ty == CFREE_CG_TYPE_NONE) return 0;
{
@@ -280,7 +289,8 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot,
}
}
if (expr_ty != field.type) cfree_cg_bitcast(p->cg, field.type);
- cfree_cg_store(p->cg, toy_mem_access(p, field.type));
+ cfree_cg_store(p->cg, toy_mem_access(p, field.type),
+ (CfreeCgEffAddr){(int64_t)foff, 0});
if (!toy_parser_match(p, TOK_COMMA)) break;
}
if (!toy_parser_expect(p, TOK_RBRACE)) {
@@ -362,7 +372,7 @@ static int toy_parse_value_block_body_to_local(ToyParser* p, CfreeCgLocal slot,
if (arm_ty != result_ty) cfree_cg_bitcast(p->cg, result_ty);
cfree_cg_push_local(p->cg, slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, result_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, result_ty), (CfreeCgEffAddr){0, 0});
if (!toy_parser_expect(p, TOK_RBRACE)) {
toy_error(p, p->cur.loc, "expected '}' after value block");
p->nvars = saved_nvars;
@@ -448,7 +458,7 @@ static int toy_parse_switch_initializer(ToyParser* p, CfreeCgLocal slot,
selector_slot = cfree_cg_local(p->cg, selector_ty, toy_slot_attrs(0));
cfree_cg_push_local(p->cg, selector_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, selector_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0});
end_label = cfree_cg_label_new(p->cg);
dispatch_label = cfree_cg_label_new(p->cg);
/* Skip the arm bodies on entry; come back through the dispatch label. */
@@ -531,7 +541,7 @@ static int toy_parse_switch_initializer(ToyParser* p, CfreeCgLocal slot,
}
cfree_cg_label_place(p->cg, dispatch_label);
cfree_cg_push_local(p->cg, selector_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, selector_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0});
if (saw_default) {
sw.default_label = default_arm_label;
} else {
@@ -639,7 +649,7 @@ static int toy_parse_while_initializer_named(ToyParser* p, CfreeCgLocal slot,
p->nscopes--;
cfree_cg_push_local(p->cg, slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, result_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, result_ty), (CfreeCgEffAddr){0, 0});
return 1;
}
@@ -828,7 +838,7 @@ static int toy_parse_let_stmt(ToyParser* p) {
if (!toy_add_local_typed(p, name, ty, toy_ty, slot, is_var)) return 0;
cfree_cg_push_local(p->cg, slot);
cfree_cg_push_int(p->cg, (uint64_t)value, ty);
- cfree_cg_store(p->cg, toy_mem_access(p, ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
if (!toy_parser_expect(p, TOK_SEMI)) {
toy_error(p, p->cur.loc, "expected ';' after let");
return 0;
@@ -946,7 +956,7 @@ static int toy_parse_let_stmt(ToyParser* p) {
} else {
cfree_cg_push_local(p->cg, slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0});
}
}
if (!toy_parser_expect(p, TOK_SEMI)) {
@@ -1051,7 +1061,7 @@ static int toy_parse_switch_stmt_named(ToyParser* p, CfreeSym label_name) {
selector_slot = cfree_cg_local(p->cg, selector_ty, toy_slot_attrs(0));
cfree_cg_push_local(p->cg, selector_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, selector_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0});
end_label = cfree_cg_label_new(p->cg);
dispatch_label = cfree_cg_label_new(p->cg);
if (!toy_parser_reserve(p, (void**)&p->scopes, &p->cap_scopes,
@@ -1116,7 +1126,7 @@ static int toy_parse_switch_stmt_named(ToyParser* p, CfreeSym label_name) {
}
cfree_cg_label_place(p->cg, dispatch_label);
cfree_cg_push_local(p->cg, selector_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, selector_ty));
+ cfree_cg_load(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0});
sw.selector_type = selector_ty;
sw.default_label = default_arm_label != CFREE_CG_LABEL_NONE
? default_arm_label
@@ -1407,7 +1417,7 @@ static int toy_parse_return_stmt(ToyParser* p) {
}
}
cfree_cg_push_local(p->cg, slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->cur_fn_ret));
+ cfree_cg_load(p->cg, toy_mem_access(p, p->cur_fn_ret), (CfreeCgEffAddr){0, 0});
cfree_cg_ret(p->cg);
if (!toy_parser_expect(p, TOK_SEMI)) {
toy_error(p, p->cur.loc, "expected ';' after return");
@@ -1504,6 +1514,10 @@ static int toy_parse_stmt(ToyParser* p) {
int root_mutable = 1;
int lhs_slice_metadata = 0;
toy_parser_advance(p);
+ /* Chain invariant: TOS holds a pointer-rvalue of type `*lhs_ty`.
+ * Intermediate field / index / dereference steps materialize the
+ * address via toy_addr_offset / toy_addr_index / explicit loads.
+ * The final store consumes the pointer with EA {0, 0}. */
{
ToyVar* v = toy_find_var(p, name);
ToyGlobal* g = toy_find_global(p, name);
@@ -1516,17 +1530,14 @@ static int toy_parse_stmt(ToyParser* p) {
lhs_toy_type = g->toy_type;
root_mutable = g->mutable;
}
- if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY ||
- cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) {
- toy_push_var_lvalue(p, v);
+ if (v) {
+ toy_push_var_addr(p, v);
lhs_ty = v->type;
- } else if (g &&
- (cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY ||
- cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) {
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ } else if (g) {
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
lhs_ty = g->type;
} else {
- lhs_ty = toy_push_named_rvalue(p, name);
+ lhs_ty = CFREE_CG_TYPE_NONE;
}
}
if (lhs_ty == CFREE_CG_TYPE_NONE) {
@@ -1548,26 +1559,42 @@ static int toy_parse_stmt(ToyParser* p) {
return 0;
}
if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_PTR) {
+ /* TOS = **T_chain (where lhs_ty = *T_chain). Load the pointer
+ * value so the index applies to the pointee. */
CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, lhs_ty);
ToyTypeId source_pointee = toy_type_pointee(p, lhs_toy_type);
- if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) {
+ /* index is currently on top; stash so we can load the pointer. */
+ {
CfreeCgLocal idx_slot =
cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0));
cfree_cg_push_local(p->cg, idx_slot);
cfree_cg_swap(p->cg);
- cfree_cg_store(p->cg, toy_mem_access(p, p->int_type));
- cfree_cg_indirect(p->cg);
+ cfree_cg_store(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty),
+ (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(p->cg, idx_slot);
- cfree_cg_load(p->cg, toy_mem_access(p, p->int_type));
- lhs_ty = cfree_cg_type_array_elem(p->c, pointee);
+ cfree_cg_load(p->cg, toy_mem_access(p, p->int_type),
+ (CfreeCgEffAddr){0, 0});
+ }
+ if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) {
+ CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee);
+ lhs_ty = elem_ty;
lhs_toy_type = toy_type_array_elem(p, source_pointee);
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
} else {
lhs_ty = pointee;
lhs_toy_type = source_pointee;
+ toy_addr_index(p, cfree_cg_type_size(p->c, lhs_ty),
+ cfree_cg_type_ptr(p->c, lhs_ty, 0));
}
} else if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_ARRAY) {
- lhs_ty = cfree_cg_type_array_elem(p->c, lhs_ty);
+ CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, lhs_ty);
+ lhs_ty = elem_ty;
lhs_toy_type = toy_type_array_elem(p, lhs_toy_type);
+ toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty),
+ cfree_cg_type_ptr(p->c, elem_ty, 0));
} else if (toy_type_is_slice(p, lhs_toy_type)) {
lhs_ty = toy_emit_slice_index_lvalue(p, lhs_ty, lhs_toy_type,
&lhs_toy_type);
@@ -1576,7 +1603,6 @@ static int toy_parse_stmt(ToyParser* p) {
toy_error(p, p->cur.loc, "cannot index non-array/non-pointer");
return 0;
}
- cfree_cg_index(p->cg, 0);
continue;
}
if (toy_parser_match(p, TOK_DOTSTAR)) {
@@ -1585,22 +1611,27 @@ static int toy_parse_stmt(ToyParser* p) {
toy_error(p, p->cur.loc, "cannot dereference non-pointer");
return 0;
}
+ /* TOS = `**T`; load to TOS = `*T`. */
+ cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty),
+ (CfreeCgEffAddr){0, 0});
lhs_ty = cfree_cg_type_ptr_pointee(p->c, lhs_ty);
lhs_toy_type = toy_type_pointee(p, lhs_toy_type);
- cfree_cg_indirect(p->cg);
continue;
}
if (toy_parser_match(p, TOK_DOT)) {
CfreeCgField field;
uint32_t field_index = 0;
ToyNamedType* named;
+ uint64_t foff = 0;
if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_PTR &&
cfree_cg_type_kind(p->c,
cfree_cg_type_ptr_pointee(p->c, lhs_ty)) ==
CFREE_CG_TYPE_RECORD) {
+ /* `p.field`: load the pointer value to address the record. */
+ cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty),
+ (CfreeCgEffAddr){0, 0});
lhs_ty = cfree_cg_type_ptr_pointee(p->c, lhs_ty);
lhs_toy_type = toy_type_pointee(p, lhs_toy_type);
- cfree_cg_indirect(p->cg);
}
if (cfree_cg_type_kind(p->c, lhs_ty) != CFREE_CG_TYPE_RECORD) {
toy_error(p, p->cur.loc, "field assignment on non-record");
@@ -1617,7 +1648,7 @@ static int toy_parse_stmt(ToyParser* p) {
field_index = (uint32_t)p->cur.int_value;
toy_parser_advance(p);
if (cfree_cg_type_record_field(p->c, lhs_ty, field_index, &field,
- NULL) != 0)
+ &foff) != 0)
return 0;
} else {
CfreeSym field_name;
@@ -1632,14 +1663,17 @@ static int toy_parse_stmt(ToyParser* p) {
toy_error(p, p->cur.loc, "unknown record field");
return 0;
}
+ if (cfree_cg_type_record_field(p->c, lhs_ty, field_index, NULL, &foff) != 0)
+ return 0;
}
- cfree_cg_field(p->cg, field_index);
lhs_slice_metadata = toy_type_is_slice(p, lhs_toy_type) &&
(field_index == 0 || field_index == 1);
lhs_ty = field.type;
lhs_toy_type = (named && field_index < named->nfields)
? named->fields[field_index].toy_type
: TOY_TYPE_NONE;
+ toy_addr_offset(p, (int64_t)foff,
+ cfree_cg_type_ptr(p->c, lhs_ty, 0));
continue;
}
break;
@@ -1672,7 +1706,7 @@ static int toy_parse_stmt(ToyParser* p) {
toy_error(p, p->cur.loc, "type mismatch in assignment");
return 0;
}
- cfree_cg_store(p->cg, toy_mem_access(p, lhs_ty));
+ cfree_cg_store(p->cg, toy_mem_access(p, lhs_ty), (CfreeCgEffAddr){0, 0});
}
if (!toy_parser_expect(p, TOK_SEMI)) {
toy_error(p, p->cur.loc, "expected ';' after assignment");
@@ -1712,7 +1746,7 @@ static int toy_parse_stmt(ToyParser* p) {
toy_push_var_lvalue(p, v);
cfree_cg_swap(p->cg);
if (expr_ty != v->type) cfree_cg_bitcast(p->cg, v->type);
- cfree_cg_store(p->cg, toy_mem_access(p, v->type));
+ cfree_cg_store(p->cg, toy_mem_access(p, v->type), (CfreeCgEffAddr){0, 0});
} else {
ToyGlobal* g = toy_find_global(p, name);
if (!g) {
@@ -1736,10 +1770,10 @@ static int toy_parse_stmt(ToyParser* p) {
}
return 1;
}
- cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, g->sym, 0);
cfree_cg_swap(p->cg);
if (expr_ty != g->type) cfree_cg_bitcast(p->cg, g->type);
- cfree_cg_store(p->cg, toy_mem_access(p, g->type));
+ cfree_cg_store(p->cg, toy_mem_access(p, g->type), (CfreeCgEffAddr){0, 0});
}
}
if (!toy_parser_expect(p, TOK_SEMI)) {
diff --git a/lang/toy/symbols.c b/lang/toy/symbols.c
@@ -191,9 +191,12 @@ ToyScope* toy_find_innermost_loop_scope(ToyParser* p) {
return NULL;
}
+/* Pushes a base operand suitable for the canonical memops: for a frame-local
+ * this is the lvalue; for a static-local this is the symbol address (the
+ * memop accepts both shapes as `base`). */
void toy_push_var_lvalue(ToyParser* p, const ToyVar* v) {
if (v->is_static)
- cfree_cg_push_symbol_lvalue(p->cg, v->static_sym, 0);
+ cfree_cg_push_symbol_addr(p->cg, v->static_sym, 0);
else
cfree_cg_push_local(p->cg, v->local);
}
@@ -205,6 +208,43 @@ void toy_push_var_addr(ToyParser* p, const ToyVar* v) {
cfree_cg_push_local_addr(p->cg, v->local);
}
+/* Address-chain helpers for the chained-lvalue paths in the toy frontend.
+ *
+ * The canonical CG memops carry only a single effective address. Field and
+ * index selectors that immediately precede a load/store therefore fold into
+ * the memop's `ea` directly. When the toy parser builds a multi-step chain
+ * (e.g. `&a.f[i].g`), each intermediate step must materialize the address
+ * with explicit pointer arithmetic so the next step sees a pointer rvalue
+ * it can compose with again. These helpers do that materialization.
+ *
+ * Inputs are pointer rvalues; outputs are pointer rvalues. Chain starts
+ * push the root pointer via `cfree_cg_push_local_addr` or
+ * `cfree_cg_push_symbol_addr` (or `cfree_cg_addr` after `push_local`). */
+
+/* TOS: [base_ptr]. After: [base_ptr + offset] as `result_ptr_ty`. */
+void toy_addr_offset(ToyParser* p, int64_t offset, CfreeCgTypeId result_ptr_ty) {
+ if (offset != 0) {
+ cfree_cg_ptr_to_int(p->cg, p->int_type);
+ cfree_cg_push_int(p->cg, (uint64_t)offset, p->int_type);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0);
+ cfree_cg_int_to_ptr(p->cg, result_ptr_ty);
+ } else {
+ cfree_cg_bitcast(p->cg, result_ptr_ty);
+ }
+}
+
+/* TOS: [base_ptr, index].
+ * After: [base_ptr + index * elem_size] as `result_ptr_ty`. */
+void toy_addr_index(ToyParser* p, uint64_t elem_size,
+ CfreeCgTypeId result_ptr_ty) {
+ cfree_cg_push_int(p->cg, elem_size, p->int_type);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_MUL, 0);
+ cfree_cg_swap(p->cg);
+ cfree_cg_ptr_to_int(p->cg, p->int_type);
+ cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0);
+ cfree_cg_int_to_ptr(p->cg, result_ptr_ty);
+}
+
CfreeCgSym toy_find_decl_sym(ToyParser* p, CfreeSym name) {
ToyGlobal* g = toy_find_global(p, name);
if (g) return g->sym;
diff --git a/lang/wasm/cg.c b/lang/wasm/cg.c
@@ -34,6 +34,15 @@ static CfreeCgMemAccess wasm_cg_mem_type(CfreeCgTypeId ty) {
return mem;
}
+/* Convenience: zero effective-address (no offset, no index). Used for
+ * load/store on a base TOS that already represents the exact address. */
+static CfreeCgEffAddr wasm_cg_ea0(void) {
+ CfreeCgEffAddr ea;
+ ea.offset = 0;
+ ea.scale = 0;
+ return ea;
+}
+
static void wasm_cg_push_zero(CfreeCompiler* c, CfreeCg* cg,
CfreeCgBuiltinTypes b, WasmValType vt) {
CfreeCgTypeId ty = wasm_cg_type(c, b, vt);
@@ -105,22 +114,26 @@ typedef struct WasmCgRuntime {
CfreeCgTypeId table_ty;
CfreeCgTypeId instance_ty;
CfreeCgTypeId instance_ptr_ty;
- uint32_t memory_field[64];
- uint32_t memory_data_field;
- uint32_t memory_pages_field;
- uint32_t memory_max_pages_field;
- uint32_t memory_flags_field;
- uint32_t func_import_field[64];
- uint32_t func_ref_entry_field[64];
- uint32_t global_field[64];
- uint32_t global_import_addr_field;
- uint32_t table_field[64];
- uint32_t table_entries_field[64];
- uint32_t table_entry_fn_field;
- uint32_t table_entry_typeidx_field;
- uint32_t table_entries_ptr_field;
- uint32_t table_len_field;
- uint32_t table_max_field;
+ uint64_t table_entry_size;
+ /* Byte offsets within the instance struct for each top-level slot. */
+ uint64_t memory_offset[64];
+ uint64_t func_import_offset[64];
+ uint64_t func_ref_entry_offset[64];
+ uint64_t global_offset[64];
+ uint64_t table_offset[64];
+ uint64_t table_entries_offset[64];
+ /* Byte offsets within their containing record. */
+ uint64_t memory_data_offset;
+ uint64_t memory_pages_offset;
+ uint64_t memory_max_pages_offset;
+ uint64_t memory_flags_offset;
+ uint64_t func_import_fn_offset;
+ uint64_t global_import_addr_offset;
+ uint64_t table_entries_ptr_offset;
+ uint64_t table_len_offset;
+ uint64_t table_max_offset;
+ uint64_t table_entry_fn_offset;
+ uint64_t table_entry_typeidx_offset;
CfreeCgTypeId trap_func_ty;
CfreeCgSym trap_syms[WASM_TRAP_COUNT];
} WasmCgRuntime;
@@ -192,6 +205,15 @@ static void wasm_indexed_name(char* name, size_t cap, const char* prefix,
name[pos] = '\0';
}
+static uint64_t wasm_cg_field_offset(CfreeCompiler* c, CfreeCgTypeId ty,
+ uint32_t index) {
+ uint64_t off = 0;
+ CfreeStatus st = cfree_cg_type_record_field(c, ty, index, NULL, &off);
+ if (st != CFREE_OK)
+ wasm_error(c, wasm_loc(0, 0), "wasm: failed to query field offset");
+ return off;
+}
+
static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
const WasmModule* m, WasmCgRuntime* rt) {
CfreeCgField memory_fields[4];
@@ -201,6 +223,12 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
CfreeCgField table_fields[3];
CfreeCgField instance_fields[256];
uint32_t nfields = 0;
+ uint32_t memory_field_idx[64];
+ uint32_t func_import_field_idx[64];
+ uint32_t func_ref_entry_field_idx[64];
+ uint32_t global_field_idx[64];
+ uint32_t table_field_idx[64];
+ uint32_t table_entries_field_idx[64];
memset(rt, 0, sizeof *rt);
rt->i8_ptr_ty = cfree_cg_type_ptr(c, b.id[CFREE_CG_BUILTIN_I8], 0);
rt->void_ptr_ty = rt->i8_ptr_ty;
@@ -215,20 +243,23 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
memory_fields[3].type = b.id[CFREE_CG_BUILTIN_I32];
rt->memory_ty = cfree_cg_type_record(
c, cfree_sym_intern(c, "CfreeWasmMemory"), memory_fields, 4);
- rt->memory_data_field = 0;
- rt->memory_pages_field = 1;
- rt->memory_max_pages_field = 2;
- rt->memory_flags_field = 3;
+ rt->memory_data_offset = wasm_cg_field_offset(c, rt->memory_ty, 0);
+ rt->memory_pages_offset = wasm_cg_field_offset(c, rt->memory_ty, 1);
+ rt->memory_max_pages_offset = wasm_cg_field_offset(c, rt->memory_ty, 2);
+ rt->memory_flags_offset = wasm_cg_field_offset(c, rt->memory_ty, 3);
memset(func_import_fields, 0, sizeof func_import_fields);
func_import_fields[0].name = cfree_sym_intern(c, "fn");
func_import_fields[0].type = rt->void_ptr_ty;
rt->func_import_ty = cfree_cg_type_record(
c, cfree_sym_intern(c, "CfreeWasmFuncImport"), func_import_fields, 1);
+ rt->func_import_fn_offset = wasm_cg_field_offset(c, rt->func_import_ty, 0);
memset(global_import_fields, 0, sizeof global_import_fields);
global_import_fields[0].name = cfree_sym_intern(c, "addr");
global_import_fields[0].type = rt->void_ptr_ty;
rt->global_import_ty = cfree_cg_type_record(
c, cfree_sym_intern(c, "CfreeWasmGlobalImport"), global_import_fields, 1);
+ rt->global_import_addr_offset =
+ wasm_cg_field_offset(c, rt->global_import_ty, 0);
memset(table_entry_fields, 0, sizeof table_entry_fields);
table_entry_fields[0].name = cfree_sym_intern(c, "fn");
table_entry_fields[0].type = rt->void_ptr_ty;
@@ -237,8 +268,10 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
rt->table_entry_ty = cfree_cg_type_record(
c, cfree_sym_intern(c, "CfreeWasmTableEntry"), table_entry_fields, 2);
rt->table_entry_ptr_ty = cfree_cg_type_ptr(c, rt->table_entry_ty, 0);
- rt->table_entry_fn_field = 0;
- rt->table_entry_typeidx_field = 1;
+ rt->table_entry_fn_offset = wasm_cg_field_offset(c, rt->table_entry_ty, 0);
+ rt->table_entry_typeidx_offset =
+ wasm_cg_field_offset(c, rt->table_entry_ty, 1);
+ rt->table_entry_size = cfree_cg_type_size(c, rt->table_entry_ty);
memset(table_fields, 0, sizeof table_fields);
table_fields[0].name = cfree_sym_intern(c, "entries");
table_fields[0].type = rt->table_entry_ptr_ty;
@@ -248,9 +281,9 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
table_fields[2].type = b.id[CFREE_CG_BUILTIN_I32];
rt->table_ty = cfree_cg_type_record(c, cfree_sym_intern(c, "CfreeWasmTable"),
table_fields, 3);
- rt->table_entries_ptr_field = 0;
- rt->table_len_field = 1;
- rt->table_max_field = 2;
+ rt->table_entries_ptr_offset = wasm_cg_field_offset(c, rt->table_ty, 0);
+ rt->table_len_offset = wasm_cg_field_offset(c, rt->table_ty, 1);
+ rt->table_max_offset = wasm_cg_field_offset(c, rt->table_ty, 2);
memset(instance_fields, 0, sizeof instance_fields);
for (uint32_t i = 0; i < m->nmemories; ++i) {
char name[40];
@@ -260,7 +293,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
memcpy(name, "memory", sizeof "memory");
else
wasm_indexed_name(name, sizeof name, "memory_", i);
- rt->memory_field[i] = nfields;
+ memory_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type = rt->memory_ty;
nfields++;
@@ -271,7 +304,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
if (nfields >= 256u)
wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large");
wasm_indexed_name(name, sizeof name, "import_func_", i);
- rt->func_import_field[i] = nfields;
+ func_import_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type = rt->func_import_ty;
nfields++;
@@ -281,7 +314,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
if (nfields >= 256u)
wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large");
wasm_indexed_name(name, sizeof name, "func_ref_", i);
- rt->func_ref_entry_field[i] = nfields;
+ func_ref_entry_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type = rt->table_entry_ty;
nfields++;
@@ -293,26 +326,25 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
wasm_indexed_name(name, sizeof name,
m->globals[i].is_import ? "import_global_" : "global_",
i);
- rt->global_field[i] = nfields;
+ global_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type =
m->globals[i].is_import ? rt->global_import_ty
: wasm_cg_type(c, b, m->globals[i].type);
nfields++;
}
- rt->global_import_addr_field = 0;
for (uint32_t i = 0; i < m->ntables; ++i) {
char name[40];
uint32_t max = m->tables[i].has_max ? m->tables[i].max : m->tables[i].min;
if (nfields + 2u > 256u)
wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large");
wasm_indexed_name(name, sizeof name, "table_", i);
- rt->table_field[i] = nfields;
+ table_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type = rt->table_ty;
nfields++;
wasm_indexed_name(name, sizeof name, "table_entries_", i);
- rt->table_entries_field[i] = nfields;
+ table_entries_field_idx[i] = nfields;
instance_fields[nfields].name = cfree_sym_intern(c, name);
instance_fields[nfields].type =
cfree_cg_type_array(c, rt->table_entry_ty, max ? max : 1u);
@@ -321,87 +353,94 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b,
rt->instance_ty = cfree_cg_type_record(
c, cfree_sym_intern(c, "CfreeWasmInstance"), instance_fields, nfields);
rt->instance_ptr_ty = cfree_cg_type_ptr(c, rt->instance_ty, 0);
+ for (uint32_t i = 0; i < m->nmemories; ++i)
+ rt->memory_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, memory_field_idx[i]);
+ for (uint32_t i = 0; i < m->nfuncs; ++i) {
+ if (m->funcs[i].is_import)
+ rt->func_import_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, func_import_field_idx[i]);
+ rt->func_ref_entry_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, func_ref_entry_field_idx[i]);
+ }
+ for (uint32_t i = 0; i < m->nglobals; ++i)
+ rt->global_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, global_field_idx[i]);
+ for (uint32_t i = 0; i < m->ntables; ++i) {
+ rt->table_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, table_field_idx[i]);
+ rt->table_entries_offset[i] =
+ wasm_cg_field_offset(c, rt->instance_ty, table_entries_field_idx[i]);
+ }
}
-static void wasm_cg_push_instance_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local) {
+/* Push the instance pointer rvalue (loaded from the instance parameter slot).
+ * Stack: [] -> [void*]. Callers fold a struct offset into the memop's EA. */
+static void wasm_cg_push_instance_ptr(CfreeCg* cg, const WasmCgRuntime* rt,
+ CfreeCgLocal instance_local) {
+ CfreeCgEffAddr ea = {0, 0};
cfree_cg_push_local(cg, instance_local);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty));
- cfree_cg_indirect(cg);
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty), ea);
}
-static void wasm_cg_push_memory_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t memidx) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->memory_field[memidx]);
+/* Add a constant byte offset to the pointer rvalue on TOS, retyping to
+ * `result_ptr_ty`. No-op when offset == 0 (and the type is not retyped). */
+static void wasm_cg_ptr_add_offset(CfreeCg* cg, CfreeCgBuiltinTypes b,
+ uint64_t offset,
+ CfreeCgTypeId result_ptr_ty) {
+ CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64];
+ if (offset == 0) {
+ cfree_cg_bitcast(cg, result_ptr_ty);
+ return;
+ }
+ cfree_cg_ptr_to_int(cg, i64_ty);
+ cfree_cg_push_int(cg, offset, i64_ty);
+ cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
+ cfree_cg_int_to_ptr(cg, result_ptr_ty);
}
+/* Load the i8* data pointer from instance->memories[memidx].data. */
static void wasm_cg_push_memory_data_ptr(CfreeCg* cg, const WasmCgRuntime* rt,
CfreeCgLocal instance_local,
uint32_t memidx) {
- wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx);
- cfree_cg_field(cg, rt->memory_data_field);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->i8_ptr_ty));
-}
-
-static void wasm_cg_push_memory_pages_lvalue(CfreeCg* cg,
- const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t memidx) {
- wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx);
- cfree_cg_field(cg, rt->memory_pages_field);
-}
-
-static void wasm_cg_push_memory_max_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t memidx) {
- wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx);
- cfree_cg_field(cg, rt->memory_max_pages_field);
-}
-
-static void wasm_cg_push_global_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t global_index) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->global_field[global_index]);
-}
-
-static void wasm_cg_push_import_func_lvalue(CfreeCg* cg,
- const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t func_index) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->func_import_field[func_index]);
+ CfreeCgEffAddr ea;
+ ea.offset =
+ (int64_t)(rt->memory_offset[memidx] + rt->memory_data_offset);
+ ea.scale = 0;
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->i8_ptr_ty), ea);
}
+/* Load instance->import_funcs[func_index].fn as a void*. */
static void wasm_cg_push_import_func_ptr(CfreeCg* cg, const WasmCgRuntime* rt,
CfreeCgLocal instance_local,
uint32_t func_index) {
- wasm_cg_push_import_func_lvalue(cg, rt, instance_local, func_index);
- cfree_cg_field(cg, 0);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty));
+ CfreeCgEffAddr ea;
+ ea.offset = (int64_t)(rt->func_import_offset[func_index] +
+ rt->func_import_fn_offset);
+ ea.scale = 0;
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), ea);
}
-static void wasm_cg_push_func_ref_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t func_index) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->func_ref_entry_field[func_index]);
-}
-
-static void wasm_cg_push_global_value_lvalue(CfreeCompiler* c, CfreeCg* cg,
- CfreeCgBuiltinTypes b,
- const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- const WasmModule* m,
- uint32_t global_index) {
- wasm_cg_push_global_lvalue(cg, rt, instance_local, global_index);
+/* Push a pointer rvalue to instance->globals[global_index]'s value cell,
+ * dereferencing the import indirection if needed. Result type is T* where T is
+ * the global's value type. */
+static void wasm_cg_push_global_value_ptr(CfreeCompiler* c, CfreeCg* cg,
+ CfreeCgBuiltinTypes b,
+ const WasmCgRuntime* rt,
+ CfreeCgLocal instance_local,
+ const WasmModule* m,
+ uint32_t global_index) {
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(
+ c, wasm_cg_type(c, b, m->globals[global_index].type), 0);
if (m->globals[global_index].is_import) {
- CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(
- c, wasm_cg_type(c, b, m->globals[global_index].type), 0);
- cfree_cg_field(cg, rt->global_import_addr_field);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty));
+ CfreeCgEffAddr ea;
+ ea.offset = (int64_t)(rt->global_offset[global_index] +
+ rt->global_import_addr_offset);
+ ea.scale = 0;
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), ea);
cfree_cg_dup(cg);
cfree_cg_push_null(cg, rt->void_ptr_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
@@ -412,37 +451,39 @@ static void wasm_cg_push_global_value_lvalue(CfreeCompiler* c, CfreeCg* cg,
cfree_cg_label_place(cg, ok);
}
cfree_cg_bitcast(cg, ptr_ty);
- cfree_cg_indirect(cg);
+ } else {
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ wasm_cg_ptr_add_offset(cg, b, rt->global_offset[global_index], ptr_ty);
}
}
-static void wasm_cg_push_table_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t table_index) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->table_field[table_index]);
-}
-
-static void wasm_cg_push_table_entries_array_lvalue(CfreeCg* cg,
- const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t table_index) {
- wasm_cg_push_instance_lvalue(cg, rt, instance_local);
- cfree_cg_field(cg, rt->table_entries_field[table_index]);
-}
-
-static void wasm_cg_push_table_entry_lvalue(CfreeCg* cg,
- const WasmCgRuntime* rt,
- CfreeCgLocal instance_local,
- uint32_t table_index,
- CfreeCgLocal index_local,
- CfreeCgMemAccess index_mem) {
- wasm_cg_push_table_lvalue(cg, rt, instance_local, table_index);
- cfree_cg_field(cg, rt->table_entries_ptr_field);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->table_entry_ptr_ty));
+/* Push a pointer rvalue to instance->tables[table_index].entries[index_local].
+ * The index is loaded from a temp local supplied by the caller (mirrors the
+ * previous helper's signature). */
+static void wasm_cg_push_table_entry_ptr(CfreeCg* cg, CfreeCgBuiltinTypes b,
+ const WasmCgRuntime* rt,
+ CfreeCgLocal instance_local,
+ uint32_t table_index,
+ CfreeCgLocal index_local,
+ CfreeCgMemAccess index_mem) {
+ CfreeCgEffAddr ea;
+ CfreeCgEffAddr ea_idx = {0, 0};
+ CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64];
+ /* Load entries pointer from instance->tables[i].entries. */
+ ea.offset = (int64_t)(rt->table_offset[table_index] +
+ rt->table_entries_ptr_offset);
+ ea.scale = 0;
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->table_entry_ptr_ty), ea);
+ /* Compute entries + index * sizeof(entry) into a pointer rvalue. */
+ cfree_cg_ptr_to_int(cg, i64_ty);
cfree_cg_push_local(cg, index_local);
- cfree_cg_load(cg, index_mem);
- cfree_cg_index(cg, 0);
+ cfree_cg_load(cg, index_mem, ea_idx);
+ cfree_cg_zext(cg, i64_ty);
+ cfree_cg_push_int(cg, rt->table_entry_size, i64_ty);
+ cfree_cg_int_binop(cg, CFREE_CG_INT_MUL, 0);
+ cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
+ cfree_cg_int_to_ptr(cg, rt->table_entry_ptr_ty);
}
static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg,
@@ -453,6 +494,7 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg,
uint32_t width = wasm_mem_width(in->kind);
uint64_t end = in->offset64 + width;
CfreeCgLabel ok = cfree_cg_label_new(cg);
+ CfreeCgEffAddr pages_ea;
uint32_t max_pages = (uint32_t)(m->memories[in->memidx].has_max
? m->memories[in->memidx].max_pages
: m->memories[in->memidx].min_pages);
@@ -462,8 +504,11 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg,
}
(void)c;
cfree_cg_dup(cg);
- wasm_cg_push_memory_pages_lvalue(cg, rt, instance_local, in->memidx);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ pages_ea.offset =
+ (int64_t)(rt->memory_offset[in->memidx] + rt->memory_pages_offset);
+ pages_ea.scale = 0;
+ wasm_cg_push_instance_ptr(cg, rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), pages_ea);
cfree_cg_push_int(cg, 65536u, b.id[CFREE_CG_BUILTIN_I64]);
cfree_cg_int_binop(cg, CFREE_CG_INT_MUL, 0);
cfree_cg_push_int(cg, end, b.id[CFREE_CG_BUILTIN_I64]);
@@ -479,12 +524,25 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg,
cfree_cg_label_place(cg, ok);
}
-static void wasm_cg_memory_lvalue(CfreeCg* cg, const WasmCgRuntime* rt,
- CfreeCgLocal instance_local, uint32_t memidx,
- uint64_t offset) {
+/* Compute the absolute address (data_ptr + addr_on_tos + offset) as a
+ * pointer rvalue. Stack: [addr] -> [void*]. */
+static void wasm_cg_memory_addr_from_tos(CfreeCg* cg, CfreeCgBuiltinTypes b,
+ const WasmCgRuntime* rt,
+ const WasmModule* m,
+ CfreeCgLocal instance_local,
+ uint32_t memidx, uint64_t offset) {
+ CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64];
+ /* TOS: [addr]. */
+ if (!m->memories[memidx].is64) cfree_cg_zext(cg, i64_ty);
+ if (offset) {
+ cfree_cg_push_int(cg, offset, i64_ty);
+ cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
+ }
+ /* TOS: [addr + offset]. */
wasm_cg_push_memory_data_ptr(cg, rt, instance_local, memidx);
- cfree_cg_swap(cg);
- cfree_cg_index(cg, offset);
+ cfree_cg_ptr_to_int(cg, i64_ty);
+ cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
+ cfree_cg_int_to_ptr(cg, rt->i8_ptr_ty);
}
static void wasm_cg_rotate(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b,
@@ -500,24 +558,24 @@ static void wasm_cg_rotate(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b,
lhs = cfree_cg_local(cg, ty, attrs);
cfree_cg_push_local(cg, rhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, lhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, lhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_int(cg, mask, ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0);
cfree_cg_int_binop(cg, right ? CFREE_CG_INT_LSHR : CFREE_CG_INT_SHL, 0);
cfree_cg_push_local(cg, lhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_int(cg, 0, ty);
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_int_binop(cg, CFREE_CG_INT_SUB, 0);
cfree_cg_push_int(cg, mask, ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0);
@@ -540,12 +598,12 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg,
lhs = cfree_cg_local(cg, ty, attrs);
cfree_cg_push_local(cg, rhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, lhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_int(cg, 0, ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, ok);
@@ -556,12 +614,12 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg,
uint64_t min_val = vt == WASM_VAL_I32 ? UINT64_C(0x80000000)
: UINT64_C(0x8000000000000000);
cfree_cg_push_local(cg, lhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_int(cg, min_val, ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, no_overflow);
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_int(cg, UINT64_MAX, ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, no_overflow);
@@ -569,9 +627,9 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg,
cfree_cg_label_place(cg, no_overflow);
}
cfree_cg_push_local(cg, lhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_int_binop(cg, op, 0);
}
@@ -601,25 +659,25 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg,
abs_bits = cfree_cg_local(cg, bit_ty, attrs);
cfree_cg_push_local(cg, value);
cfree_cg_swap(cg);
- cfree_cg_store(cg, src_mem);
+ cfree_cg_store(cg, src_mem, wasm_cg_ea0());
cfree_cg_push_local(cg, bits);
cfree_cg_push_local(cg, value);
- cfree_cg_load(cg, src_mem);
+ cfree_cg_load(cg, src_mem, wasm_cg_ea0());
cfree_cg_bitcast(cg, bit_ty);
- cfree_cg_store(cg, bit_mem);
+ cfree_cg_store(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_local(cg, abs_bits);
cfree_cg_push_local(cg, bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, abs_mask, bit_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0);
- cfree_cg_store(cg, bit_mem);
+ cfree_cg_store(cg, bit_mem, wasm_cg_ea0());
{
CfreeCgLabel finite = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, abs_bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, inf_bits, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U);
cfree_cg_branch_true(cg, finite);
@@ -644,12 +702,12 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg,
if (is_unsigned) {
CfreeCgLabel nonnegative = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, abs_bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, 0, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ);
cfree_cg_branch_true(cg, nonnegative);
cfree_cg_push_local(cg, bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, sign_mask, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U);
cfree_cg_branch_true(cg, nonnegative);
@@ -659,7 +717,7 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg,
{
CfreeCgLabel in_range = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, abs_bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, limit_bits, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U);
cfree_cg_branch_true(cg, in_range);
@@ -670,19 +728,19 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg,
CfreeCgLabel negative = cfree_cg_label_new(cg);
CfreeCgLabel in_range = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, sign_mask, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_GE_U);
cfree_cg_branch_true(cg, negative);
cfree_cg_push_local(cg, abs_bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, limit_bits, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U);
cfree_cg_branch_true(cg, in_range);
wasm_cg_trap_invalid_conversion(cg, rt);
cfree_cg_label_place(cg, negative);
cfree_cg_push_local(cg, abs_bits);
- cfree_cg_load(cg, bit_mem);
+ cfree_cg_load(cg, bit_mem, wasm_cg_ea0());
cfree_cg_push_int(cg, limit_bits, bit_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U);
cfree_cg_branch_true(cg, in_range);
@@ -691,7 +749,7 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg,
}
cfree_cg_push_local(cg, value);
- cfree_cg_load(cg, src_mem);
+ cfree_cg_load(cg, src_mem, wasm_cg_ea0());
if (is_unsigned)
cfree_cg_float_to_uint(cg, dst_ty, CFREE_CG_ROUND_TOWARD_ZERO);
else
@@ -771,30 +829,30 @@ static void wasm_cg_call_func(CfreeCompiler* c, CfreeCg* cg,
cfree_cg_local(cg, wasm_cg_type(c, b, f->params[param]), attrs);
cfree_cg_push_local(cg, args[param]);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, f->params[param]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, f->params[param]), wasm_cg_ea0());
}
if (f->is_import) {
CfreeCgLabel ok = cfree_cg_label_new(cg);
callee = cfree_cg_local(cg, rt->void_ptr_ty, attrs);
cfree_cg_push_local(cg, callee);
wasm_cg_push_import_func_ptr(cg, rt, instance_local, func_index);
- cfree_cg_store(cg, wasm_cg_mem_type(rt->void_ptr_ty));
+ cfree_cg_store(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0());
cfree_cg_push_local(cg, callee);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0());
cfree_cg_push_null(cg, rt->void_ptr_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, ok);
wasm_cg_trap_table(cg, rt);
cfree_cg_label_place(cg, ok);
cfree_cg_push_local(cg, callee);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0());
cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, func_type, 0));
}
cfree_cg_push_local(cg, instance_local);
- cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty), wasm_cg_ea0());
for (uint32_t p = 0; p < f->nparams; ++p) {
cfree_cg_push_local(cg, args[p]);
- cfree_cg_load(cg, wasm_cg_mem(c, b, f->params[p]));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, f->params[p]), wasm_cg_ea0());
}
if (f->is_import)
cfree_cg_call(
@@ -944,16 +1002,20 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
const WasmMemory* mem = &m->memories[i];
uint64_t max_pages = mem->has_max ? mem->max_pages : mem->min_pages;
uint32_t flags = (mem->shared ? 1u : 0u) | (mem->is64 ? 2u : 0u);
- wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, i);
+ CfreeCgEffAddr ea;
+ ea.scale = 0;
+ ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_pages_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, mem->min_pages, b.id[CFREE_CG_BUILTIN_I64]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
- wasm_cg_push_memory_max_lvalue(cg, &rt, instance_local, i);
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea);
+ ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_max_pages_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, max_pages, b.id[CFREE_CG_BUILTIN_I64]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
- wasm_cg_push_memory_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.memory_flags_field);
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea);
+ ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_flags_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, flags, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea);
if (mem->data_init_len) {
CfreeCgSym data_sym = cfree_cg_const_data(
cg, mem->data, mem->data_init_len, 16, b.id[CFREE_CG_BUILTIN_I8]);
@@ -964,38 +1026,45 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
}
}
for (i = 0; i < m->nfuncs; ++i) {
- wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.table_entry_fn_field);
+ CfreeCgEffAddr ea;
+ ea.scale = 0;
+ ea.offset =
+ (int64_t)(rt.func_ref_entry_offset[i] + rt.table_entry_fn_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
if (m->funcs[i].is_import) {
wasm_cg_push_import_func_ptr(cg, &rt, instance_local, i);
} else {
cfree_cg_push_symbol_addr(cg, syms[i], 0);
cfree_cg_bitcast(cg, rt.void_ptr_ty);
}
- cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty));
- wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.table_entry_typeidx_field);
+ cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea);
+ ea.offset = (int64_t)(rt.func_ref_entry_offset[i] +
+ rt.table_entry_typeidx_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, m->funcs[i].typeidx, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea);
}
for (i = 0; i < m->ntables; ++i) {
const WasmTable* t = &m->tables[i];
uint32_t max = t->has_max ? t->max : t->min;
- wasm_cg_push_table_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.table_entries_ptr_field);
- wasm_cg_push_table_entries_array_lvalue(cg, &rt, instance_local, i);
- cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_index(cg, 0);
- cfree_cg_addr(cg);
- cfree_cg_store(cg, wasm_cg_mem_type(rt.table_entry_ptr_ty));
- wasm_cg_push_table_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.table_len_field);
+ CfreeCgEffAddr ea;
+ ea.scale = 0;
+ /* tables[i].entries = &instance->table_entries_arr[i][0]. The address
+ * of the entries array is instance + table_entries_offset[i]. */
+ ea.offset = (int64_t)(rt.table_offset[i] + rt.table_entries_ptr_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ wasm_cg_ptr_add_offset(cg, b, rt.table_entries_offset[i],
+ rt.table_entry_ptr_ty);
+ cfree_cg_store(cg, wasm_cg_mem_type(rt.table_entry_ptr_ty), ea);
+ ea.offset = (int64_t)(rt.table_offset[i] + rt.table_len_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, t->min, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
- wasm_cg_push_table_lvalue(cg, &rt, instance_local, i);
- cfree_cg_field(cg, rt.table_max_field);
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea);
+ ea.offset = (int64_t)(rt.table_offset[i] + rt.table_max_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_int(cg, max, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea);
}
for (i = 0; i < m->nelems; ++i) {
const WasmElemSegment* seg = &m->elems[i];
@@ -1009,37 +1078,48 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
slot_local = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], tmp_attrs);
cfree_cg_push_local(cg, slot_local);
cfree_cg_push_int(cg, slot, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
- wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, seg->tableidx,
- slot_local,
- wasm_cg_mem(c, b, WASM_VAL_I32));
- cfree_cg_field(cg, rt.table_entry_fn_field);
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
+ wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, seg->tableidx,
+ slot_local,
+ wasm_cg_mem(c, b, WASM_VAL_I32));
if (m->funcs[funcidx].is_import) {
wasm_cg_push_import_func_ptr(cg, &rt, instance_local, funcidx);
} else {
cfree_cg_push_symbol_addr(cg, syms[funcidx], 0);
cfree_cg_bitcast(cg, rt.void_ptr_ty);
}
- cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty));
- wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, seg->tableidx,
- slot_local,
- wasm_cg_mem(c, b, WASM_VAL_I32));
- cfree_cg_field(cg, rt.table_entry_typeidx_field);
+ {
+ CfreeCgEffAddr ea;
+ ea.scale = 0;
+ ea.offset = (int64_t)rt.table_entry_fn_offset;
+ cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea);
+ }
+ wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, seg->tableidx,
+ slot_local,
+ wasm_cg_mem(c, b, WASM_VAL_I32));
cfree_cg_push_int(cg, m->funcs[funcidx].typeidx,
b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ {
+ CfreeCgEffAddr ea;
+ ea.scale = 0;
+ ea.offset = (int64_t)rt.table_entry_typeidx_offset;
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea);
+ }
}
}
for (i = 0; i < m->nglobals; ++i) {
const WasmGlobal* g = &m->globals[i];
+ CfreeCgEffAddr ea;
if (g->is_import) continue;
- wasm_cg_push_global_lvalue(cg, &rt, instance_local, i);
+ ea.scale = 0;
+ ea.offset = (int64_t)rt.global_offset[i];
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
if (g->type == WASM_VAL_F32 || g->type == WASM_VAL_F64)
cfree_cg_push_float(cg, g->init.fp, wasm_cg_type(c, b, g->type));
else
cfree_cg_push_int(cg, (uint64_t)g->init.imm,
wasm_cg_type(c, b, g->type));
- cfree_cg_store(cg, wasm_cg_mem(c, b, g->type));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, g->type), ea);
}
if (m->has_start)
wasm_cg_call_func(c, cg, b, &m->funcs[m->start_func], &rt,
@@ -1080,7 +1160,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_local(cg, wasm_cg_type(c, b, f->locals[j]), attrs);
cfree_cg_push_local(cg, locals[f->nparams + j]);
wasm_cg_push_zero(c, cg, b, f->locals[j]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, f->locals[j]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, f->locals[j]), wasm_cg_ea0());
}
for (j = 0; j < f->ninsns; ++j) {
WasmInsn in = f->insns[j];
@@ -1207,30 +1287,30 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_push_local(cg, cond);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
cfree_cg_push_local(cg, rhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, lhs);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, cond);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
cfree_cg_branch_false(cg, else_label);
cfree_cg_push_local(cg, result);
cfree_cg_push_local(cg, lhs);
- cfree_cg_load(cg, mem);
- cfree_cg_store(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_jump(cg, end_label);
cfree_cg_label_place(cg, else_label);
cfree_cg_push_local(cg, result);
cfree_cg_push_local(cg, rhs);
- cfree_cg_load(cg, mem);
- cfree_cg_store(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_label_place(cg, end_label);
cfree_cg_push_local(cg, result);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
} break;
case WASM_INSN_I32_CONST:
cfree_cg_push_int(cg, (uint64_t)(uint32_t)in.imm,
@@ -1248,14 +1328,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
case WASM_INSN_LOCAL_GET: {
uint32_t index = (uint32_t)in.imm;
cfree_cg_push_local(cg, locals[index]);
- cfree_cg_load(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0());
break;
}
case WASM_INSN_LOCAL_SET: {
uint32_t index = (uint32_t)in.imm;
cfree_cg_push_local(cg, locals[index]);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0());
break;
}
case WASM_INSN_LOCAL_TEE: {
@@ -1263,7 +1343,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_dup(cg);
cfree_cg_push_local(cg, locals[index]);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0());
break;
}
case WASM_INSN_CALL:
@@ -1304,22 +1384,27 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_local(cg, wasm_cg_type(c, b, t->results[0]), attrs);
cfree_cg_push_local(cg, selector);
cfree_cg_swap(cg);
- cfree_cg_store(cg, i32_mem);
+ cfree_cg_store(cg, i32_mem, wasm_cg_ea0());
for (uint32_t p = 0; p < t->nparams; ++p) {
uint32_t param = t->nparams - 1u - p;
args[param] =
cfree_cg_local(cg, wasm_cg_type(c, b, t->params[param]), attrs);
cfree_cg_push_local(cg, args[param]);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]), wasm_cg_ea0());
}
ok = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, selector);
- cfree_cg_load(cg, i32_mem);
- wasm_cg_push_table_lvalue(cg, &rt, instance_local, in.align);
- cfree_cg_field(cg, rt.table_len_field);
- cfree_cg_load(cg, i32_mem);
+ cfree_cg_load(cg, i32_mem, wasm_cg_ea0());
+ {
+ CfreeCgEffAddr ea_len;
+ ea_len.scale = 0;
+ ea_len.offset =
+ (int64_t)(rt.table_offset[in.align] + rt.table_len_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ cfree_cg_load(cg, i32_mem, ea_len);
+ }
cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U);
cfree_cg_branch_true(cg, ok);
wasm_cg_trap_table(cg, &rt);
@@ -1327,13 +1412,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
ok = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, callee);
- wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, in.align,
- selector, i32_mem);
- cfree_cg_field(cg, rt.table_entry_fn_field);
- cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty));
- cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty));
+ wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, in.align,
+ selector, i32_mem);
+ {
+ CfreeCgEffAddr ea_fn;
+ ea_fn.scale = 0;
+ ea_fn.offset = (int64_t)rt.table_entry_fn_offset;
+ cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea_fn);
+ }
+ cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0());
cfree_cg_push_local(cg, callee);
- cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0());
cfree_cg_push_null(cg, rt.void_ptr_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, ok);
@@ -1341,10 +1430,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_label_place(cg, ok);
ok = cfree_cg_label_new(cg);
- wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, in.align,
- selector, i32_mem);
- cfree_cg_field(cg, rt.table_entry_typeidx_field);
- cfree_cg_load(cg, i32_mem);
+ wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, in.align,
+ selector, i32_mem);
+ {
+ CfreeCgEffAddr ea_ti;
+ ea_ti.scale = 0;
+ ea_ti.offset = (int64_t)rt.table_entry_typeidx_offset;
+ cfree_cg_load(cg, i32_mem, ea_ti);
+ }
cfree_cg_push_int(cg, (uint32_t)in.imm, b.id[CFREE_CG_BUILTIN_I32]);
cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ);
cfree_cg_branch_true(cg, ok);
@@ -1352,13 +1445,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_label_place(cg, ok);
cfree_cg_push_local(cg, callee);
- cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0());
cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, indirect_func_type, 0));
cfree_cg_push_local(cg, instance_local);
- cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty), wasm_cg_ea0());
for (uint32_t p = 0; p < t->nparams; ++p) {
cfree_cg_push_local(cg, args[p]);
- cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]), wasm_cg_ea0());
}
cfree_cg_call(cg, t->nparams + 1u, indirect_func_type,
(CfreeCgCallAttrs){
@@ -1371,9 +1464,9 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
} else if (t->nresults) {
cfree_cg_push_local(cg, result);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0());
cfree_cg_push_local(cg, result);
- cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0());
}
break;
}
@@ -1381,10 +1474,10 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_push_null(cg, rt.void_ptr_ty);
break;
case WASM_INSN_REF_FUNC:
- wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local,
- (uint32_t)in.imm);
- cfree_cg_addr(cg);
- cfree_cg_bitcast(cg, rt.void_ptr_ty);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ wasm_cg_ptr_add_offset(cg, b,
+ rt.func_ref_entry_offset[(uint32_t)in.imm],
+ rt.void_ptr_ty);
break;
case WASM_INSN_REF_IS_NULL:
cfree_cg_push_null(cg, rt.void_ptr_ty);
@@ -1410,18 +1503,18 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
callee = cfree_cg_local(cg, rt.void_ptr_ty, attrs);
cfree_cg_push_local(cg, callee_ref);
cfree_cg_swap(cg);
- cfree_cg_store(cg, ref_mem);
+ cfree_cg_store(cg, ref_mem, wasm_cg_ea0());
for (uint32_t p = 0; p < t->nparams; ++p) {
uint32_t param = t->nparams - 1u - p;
args[param] =
cfree_cg_local(cg, wasm_cg_type(c, b, t->params[param]), attrs);
cfree_cg_push_local(cg, args[param]);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]), wasm_cg_ea0());
}
ok = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, callee_ref);
- cfree_cg_load(cg, ref_mem);
+ cfree_cg_load(cg, ref_mem, wasm_cg_ea0());
cfree_cg_push_null(cg, rt.void_ptr_ty);
cfree_cg_int_cmp(cg, CFREE_CG_INT_NE);
cfree_cg_branch_true(cg, ok);
@@ -1430,11 +1523,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
ok = cfree_cg_label_new(cg);
cfree_cg_push_local(cg, callee_ref);
- cfree_cg_load(cg, ref_mem);
+ cfree_cg_load(cg, ref_mem, wasm_cg_ea0());
cfree_cg_bitcast(cg, rt.table_entry_ptr_ty);
- cfree_cg_indirect(cg);
- cfree_cg_field(cg, rt.table_entry_typeidx_field);
- cfree_cg_load(cg, i32_mem);
+ {
+ CfreeCgEffAddr ea_ti;
+ ea_ti.scale = 0;
+ ea_ti.offset = (int64_t)rt.table_entry_typeidx_offset;
+ cfree_cg_load(cg, i32_mem, ea_ti);
+ }
cfree_cg_push_int(cg, (uint32_t)in.imm, b.id[CFREE_CG_BUILTIN_I32]);
cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ);
cfree_cg_branch_true(cg, ok);
@@ -1443,12 +1539,15 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
cfree_cg_push_local(cg, callee);
cfree_cg_push_local(cg, callee_ref);
- cfree_cg_load(cg, ref_mem);
+ cfree_cg_load(cg, ref_mem, wasm_cg_ea0());
cfree_cg_bitcast(cg, rt.table_entry_ptr_ty);
- cfree_cg_indirect(cg);
- cfree_cg_field(cg, rt.table_entry_fn_field);
- cfree_cg_load(cg, ref_mem);
- cfree_cg_store(cg, ref_mem);
+ {
+ CfreeCgEffAddr ea_fn;
+ ea_fn.scale = 0;
+ ea_fn.offset = (int64_t)rt.table_entry_fn_offset;
+ cfree_cg_load(cg, ref_mem, ea_fn);
+ }
+ cfree_cg_store(cg, ref_mem, wasm_cg_ea0());
memset(ref_params, 0, sizeof ref_params);
ref_params[0].type = rt.instance_ptr_ty;
@@ -1465,13 +1564,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
result =
cfree_cg_local(cg, wasm_cg_type(c, b, t->results[0]), attrs);
cfree_cg_push_local(cg, callee);
- cfree_cg_load(cg, ref_mem);
+ cfree_cg_load(cg, ref_mem, wasm_cg_ea0());
cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, ref_func_type, 0));
cfree_cg_push_local(cg, instance_local);
- cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty));
+ cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty), wasm_cg_ea0());
for (uint32_t p = 0; p < t->nparams; ++p) {
cfree_cg_push_local(cg, args[p]);
- cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]), wasm_cg_ea0());
}
cfree_cg_call(
cg, t->nparams + 1u, ref_func_type,
@@ -1484,25 +1583,27 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
} else if (t->nresults) {
cfree_cg_push_local(cg, result);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0());
cfree_cg_push_local(cg, result);
- cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0());
}
break;
}
case WASM_INSN_GLOBAL_GET: {
uint32_t index = (uint32_t)in.imm;
- wasm_cg_push_global_value_lvalue(c, cg, b, &rt, instance_local, m,
- index);
- cfree_cg_load(cg, wasm_cg_mem(c, b, m->globals[index].type));
+ wasm_cg_push_global_value_ptr(c, cg, b, &rt, instance_local, m,
+ index);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, m->globals[index].type),
+ wasm_cg_ea0());
break;
}
case WASM_INSN_GLOBAL_SET: {
uint32_t index = (uint32_t)in.imm;
- wasm_cg_push_global_value_lvalue(c, cg, b, &rt, instance_local, m,
- index);
+ wasm_cg_push_global_value_ptr(c, cg, b, &rt, instance_local, m,
+ index);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, m->globals[index].type));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, m->globals[index].type),
+ wasm_cg_ea0());
break;
}
case WASM_INSN_RETURN:
@@ -1514,12 +1615,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
case WASM_INSN_DROP:
cfree_cg_drop(cg);
break;
- case WASM_INSN_MEMORY_SIZE:
- wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ case WASM_INSN_MEMORY_SIZE: {
+ CfreeCgEffAddr ea_pages;
+ ea_pages.scale = 0;
+ ea_pages.offset =
+ (int64_t)(rt.memory_offset[in.memidx] + rt.memory_pages_offset);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages);
if (!m->memories[in.memidx].is64)
cfree_cg_trunc(cg, b.id[CFREE_CG_BUILTIN_I32]);
break;
+ }
case WASM_INSN_MEMORY_GROW: {
CfreeCgLocalAttrs attrs;
CfreeCgLocal delta, old_pages, grow_result;
@@ -1528,6 +1634,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
WasmValType page_vt =
m->memories[in.memidx].is64 ? WASM_VAL_I64 : WASM_VAL_I32;
CfreeCgTypeId page_ty = wasm_cg_type(c, b, page_vt);
+ CfreeCgEffAddr ea_pages, ea_max;
+ ea_pages.scale = 0;
+ ea_pages.offset =
+ (int64_t)(rt.memory_offset[in.memidx] + rt.memory_pages_offset);
+ ea_max.scale = 0;
+ ea_max.offset = (int64_t)(rt.memory_offset[in.memidx] +
+ rt.memory_max_pages_offset);
memset(&attrs, 0, sizeof attrs);
attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP;
delta = cfree_cg_local(cg, page_ty, attrs);
@@ -1535,47 +1648,47 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
grow_result = cfree_cg_local(cg, page_ty, attrs);
cfree_cg_push_local(cg, delta);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
cfree_cg_push_local(cg, old_pages);
- wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages);
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0());
cfree_cg_push_local(cg, delta);
- cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
if (!m->memories[in.memidx].is64)
cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I64]);
- wasm_cg_push_memory_max_lvalue(cg, &rt, instance_local, in.memidx);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_max);
cfree_cg_push_local(cg, old_pages);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0());
cfree_cg_int_binop(cg, CFREE_CG_INT_SUB, 0);
cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U);
cfree_cg_branch_false(cg, fail);
- wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx);
+ wasm_cg_push_instance_ptr(cg, &rt, instance_local);
cfree_cg_push_local(cg, old_pages);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0());
cfree_cg_push_local(cg, delta);
- cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
if (!m->memories[in.memidx].is64)
cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I64]);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages);
cfree_cg_push_local(cg, grow_result);
cfree_cg_push_local(cg, old_pages);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0());
if (!m->memories[in.memidx].is64)
cfree_cg_trunc(cg, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
cfree_cg_jump(cg, done);
cfree_cg_label_place(cg, fail);
cfree_cg_push_local(cg, grow_result);
cfree_cg_push_int(cg, UINT64_MAX, page_ty);
- cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
cfree_cg_label_place(cg, done);
cfree_cg_push_local(cg, grow_result);
- cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0());
break;
}
case WASM_INSN_ATOMIC_FENCE:
@@ -1593,9 +1706,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
CfreeCgMemAccess mem = wasm_cg_mem_type(ty);
mem.align = in.align;
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_addr(cg);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST);
break;
}
@@ -1617,13 +1728,11 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
value_tmp = cfree_cg_local(cg, ty, attrs);
cfree_cg_push_local(cg, value_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_addr(cg);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_push_local(cg, value_tmp);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_atomic_store(cg, mem, CFREE_CG_MO_SEQ_CST);
break;
}
@@ -1650,13 +1759,11 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
value_tmp = cfree_cg_local(cg, ty, attrs);
cfree_cg_push_local(cg, value_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_addr(cg);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_push_local(cg, value_tmp);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_atomic_rmw(cg, mem, wasm_atomic_rmw_op(in.kind),
CFREE_CG_MO_SEQ_CST);
break;
@@ -1675,18 +1782,16 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
expected_tmp = cfree_cg_local(cg, ty, attrs);
cfree_cg_push_local(cg, desired_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, expected_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_addr(cg);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_push_local(cg, expected_tmp);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_push_local(cg, desired_tmp);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_atomic_cmpxchg(cg, mem, CFREE_CG_MO_SEQ_CST,
CFREE_CG_MO_SEQ_CST, 0);
cfree_cg_drop(cg);
@@ -1709,31 +1814,29 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
result_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs);
cfree_cg_push_local(cg, timeout_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0());
cfree_cg_push_local(cg, expected_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_addr(cg);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST);
cfree_cg_push_local(cg, expected_tmp);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ);
cfree_cg_branch_true(cg, equal);
cfree_cg_push_local(cg, result_tmp);
cfree_cg_push_int(cg, 1, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
cfree_cg_jump(cg, done);
cfree_cg_label_place(cg, equal);
(void)timeout_tmp;
cfree_cg_push_local(cg, result_tmp);
cfree_cg_push_int(cg, 2, b.id[CFREE_CG_BUILTIN_I32]);
- cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
cfree_cg_label_place(cg, done);
cfree_cg_push_local(cg, result_tmp);
- cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32));
+ cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0());
break;
}
case WASM_INSN_MEMORY_ATOMIC_NOTIFY: {
@@ -1745,7 +1848,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
count_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs);
cfree_cg_push_local(cg, count_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, i32_mem);
+ cfree_cg_store(cg, i32_mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
cfree_cg_drop(cg);
cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]);
@@ -1771,9 +1874,8 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
mem.type = storage;
mem.align = in.align;
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
- cfree_cg_load(cg, mem);
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
if (storage != result) {
if (in.kind == WASM_INSN_I32_LOAD8_S ||
in.kind == WASM_INSN_I32_LOAD16_S ||
@@ -1811,18 +1913,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts,
addr_tmp = cfree_cg_local(cg, wasm_cg_type(c, b, addr_vt), attrs);
cfree_cg_push_local(cg, value_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in);
cfree_cg_push_local(cg, addr_tmp);
cfree_cg_swap(cg);
- cfree_cg_store(cg, wasm_cg_mem(c, b, addr_vt));
+ cfree_cg_store(cg, wasm_cg_mem(c, b, addr_vt), wasm_cg_ea0());
cfree_cg_push_local(cg, addr_tmp);
- cfree_cg_load(cg, wasm_cg_mem(c, b, addr_vt));
- wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx,
- in.offset64);
+ cfree_cg_load(cg, wasm_cg_mem(c, b, addr_vt), wasm_cg_ea0());
+ wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64);
cfree_cg_push_local(cg, value_tmp);
- cfree_cg_load(cg, mem);
- cfree_cg_store(cg, mem);
+ cfree_cg_load(cg, mem, wasm_cg_ea0());
+ cfree_cg_store(cg, mem, wasm_cg_ea0());
break;
}
case WASM_INSN_I32_ADD:
diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c
@@ -1197,6 +1197,10 @@ static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) {
case 3: /* %aN — memory addressing form */
if (op->kind != OPK_INDIRECT)
inline_panic(a, "%a on non-memory operand");
+ /* Inline asm consumes a plain pointer-shaped address; the cg
+ * contract guarantees no EA index here. */
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "%a operand has unexpected EA index");
render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
return;
default:
@@ -1211,6 +1215,8 @@ static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) {
render_imm(sb, op->v.imm);
return;
case OPK_INDIRECT:
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "inline-asm operand has unexpected EA index");
render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
return;
default:
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -156,40 +156,145 @@ static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
return obj_symbol_extern_via_got(t->c, t->obj, sym);
}
-static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
+/* Effective-address descriptor produced by addr_mode. Mirrors the
+ * Operand.v.ind shape after any required fixups (offset folded into a
+ * scratch register when out of range, GLOBAL materialized into a register).
+ * `index == REG_NONE` means plain base+offset; otherwise the indexed
+ * register-offset form should be used and ofs is always 0. */
+typedef struct AAAddrMode {
+ u32 base; /* physical register holding the base */
+ u32 index; /* physical register holding the index, or REG_NONE */
+ u32 log2_scale; /* 0..3 — only valid when index != REG_NONE */
+ i32 ofs; /* signed displacement; 0 when index != REG_NONE */
+} AAAddrMode;
+
+/* Resolve an Operand addressing form to an AAAddrMode usable by the
+ * load/store emitters. Handles all base kinds (LOCAL, INDIRECT, GLOBAL)
+ * and folds out-of-range offsets through `tmp_reg` via
+ * aa64_emit_addr_adjust, matching the prior addr_base contract.
+ *
+ * When the input INDIRECT carries an index, this routine preserves it in
+ * the result. If a nonzero displacement is also present, it is added to
+ * the base via the temp register so the indexed register-offset
+ * instruction (which encodes no displacement) can use {tmp, index, 0}. */
+static AAAddrMode addr_mode(CGTarget* t, Operand addr, u32 tmp_reg) {
AAImpl* a = impl_of(t);
+ AAAddrMode m;
+ m.base = 0u;
+ m.index = REG_NONE;
+ m.log2_scale = 0u;
+ m.ofs = 0;
+
if (addr.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot");
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_mode: bad slot");
i32 off = -(i32)s->off;
if (off >= -256 && off <= 255) {
- *out_off = off;
- return 29;
+ m.base = 29u;
+ m.ofs = off;
+ } else {
+ aa64_emit_addr_adjust(t->mc, tmp_reg, 29u, off);
+ m.base = tmp_reg;
+ m.ofs = 0;
}
- aa64_emit_addr_adjust(t->mc, tmp_reg, 29, off);
- *out_off = 0;
- return tmp_reg;
+ return m;
}
if (addr.kind == OPK_INDIRECT) {
i32 off = addr.v.ind.ofs;
- u32 base = addr.v.ind.base & 0x1f;
- if (off >= -256 && off <= 255) {
- *out_off = off;
- return base;
+ u32 base = addr.v.ind.base & 0x1fu;
+ Reg idx = addr.v.ind.index;
+ if (idx == REG_NONE) {
+ if (off >= -256 && off <= 255) {
+ m.base = base;
+ m.ofs = off;
+ } else {
+ aa64_emit_addr_adjust(t->mc, tmp_reg, base, off);
+ m.base = tmp_reg;
+ m.ofs = 0;
+ }
+ return m;
+ }
+ /* Indexed: fold any displacement into the base so the indexed
+ * register-offset instruction can encode just {base, index, scale}. */
+ if (off != 0) {
+ aa64_emit_addr_adjust(t->mc, tmp_reg, base, off);
+ m.base = tmp_reg;
+ } else {
+ m.base = base;
}
- aa64_emit_addr_adjust(t->mc, tmp_reg, base, off);
- *out_off = 0;
- return tmp_reg;
+ m.index = (u32)idx & 0x1fu;
+ m.log2_scale = addr.v.ind.log2_scale & 0x3u;
+ m.ofs = 0;
+ return m;
}
if (addr.kind == OPK_GLOBAL) {
emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend);
- *out_off = 0;
- return tmp_reg;
+ m.base = tmp_reg;
+ m.ofs = 0;
+ return m;
}
- compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d",
+ compiler_panic(t->c, a->loc, "aarch64 addr_mode: unsupported kind %d",
(int)addr.kind);
}
+/* Assert that an Operand consumed by a non-load/store path carries no
+ * EA index. Per doc/INDIRECT.md the cg layer never routes an indexed
+ * OPK_INDIRECT to spill/reload, bitfield, atomics, copy_bytes/set_bytes,
+ * inline asm, or addr_of; the assert catches upstream misrouting before
+ * it silently produces incorrect addressing. */
+static inline void aa_assert_no_index(CGTarget* t, Operand addr,
+ const char* where) {
+ if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 %s: OPK_INDIRECT with index unexpected", where);
+ }
+}
+
+/* LDR (register), 32-bit option=LSL. Encodes
+ * LDR<size> Wt|Xt, [Xn, Xm{, LSL #amt}] (integer)
+ * where size in {0..3} selects byte/half/word/double; opc=01 (load).
+ * S=0 -> no shift (amt=0); S=1 -> shift by `size` (amt=size).
+ * The aarch64 register-offset addressing mode supports only those two
+ * shift amounts (other values must be lowered upstream). */
+static inline u32 aa64_ldr_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x38606800u | (size << 30) | ((Rm & 0x1fu) << 16) |
+ ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+static inline u32 aa64_str_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x38206800u | (size << 30) | ((Rm & 0x1fu) << 16) |
+ ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+static inline u32 aa64_ldr_fp_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x3C606800u | (size << 30) | ((Rm & 0x1fu) << 16) |
+ ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+static inline u32 aa64_str_fp_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x3C206800u | (size << 30) | ((Rm & 0x1fu) << 16) |
+ ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+/* 128-bit Q register-offset variants (size encoded as size=00, opc bit
+ * pattern 11 selects 128b). */
+static inline u32 aa64_ldr_q_reg(u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x3CE06800u | ((Rm & 0x1fu) << 16) | ((S & 1u) << 12) |
+ ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+static inline u32 aa64_str_q_reg(u32 Rt, u32 Rn, u32 Rm, u32 S) {
+ return 0x3CA06800u | ((Rm & 0x1fu) << 16) | ((S & 1u) << 12) |
+ ((Rn & 0x1fu) << 5) | (Rt & 0x1fu);
+}
+
+/* True if `log2_scale` is legal for the aarch64 register-offset form at
+ * a given access size index (sidx). The encoding supports S=0 (LSL #0)
+ * and S=1 (LSL #sidx) — any other scale must be lowered by adding
+ * `index << log2_scale` into the base via arch_lower_indexed before the
+ * load/store. */
+static inline int aa_indexed_scale_legal(u32 sidx, u32 log2_scale,
+ u32* S_out) {
+ if (log2_scale == 0u) { *S_out = 0u; return 1; }
+ if (log2_scale == sidx) { *S_out = 1u; return 1; }
+ return 0;
+}
+
void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
u32 sidx = size_idx_for_bytes(sz);
@@ -222,12 +327,35 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
return;
}
- i32 off;
- u32 base = addr_base(t, addr, &off, AA_TMP0);
+ /* Indexed register-offset form: emit `LDR Rt, [Rn, Rm{, LSL #s}]` when
+ * the EA carries an index and the scale matches the encoding (S=0 →
+ * LSL #0, S=1 → LSL #sidx). Otherwise fall back to
+ * arch_lower_indexed, which materializes base+(index<<scale) into a
+ * scratch and gives us a plain base+disp shape. */
+ if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) {
+ u32 S;
+ if (aa_indexed_scale_legal(sidx, addr.v.ind.log2_scale & 0x3u, &S)) {
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
+ if (dst.cls == RC_FP) {
+ if (sidx == 4u)
+ aa64_emit32(t->mc, aa64_ldr_q_reg(reg_num(dst), m.base, m.index, S));
+ else
+ aa64_emit32(t->mc,
+ aa64_ldr_fp_reg(sidx, reg_num(dst), m.base, m.index, S));
+ } else {
+ aa64_emit32(t->mc,
+ aa64_ldr_reg(sidx, reg_num(dst), m.base, m.index, S));
+ }
+ return;
+ }
+ addr = arch_lower_indexed(t, addr, AA_TMP0);
+ }
+
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
if (dst.cls == RC_FP) {
- aa_emit_ldr_fp_any(t->mc, sidx, reg_num(dst), base, off);
+ aa_emit_ldr_fp_any(t->mc, sidx, reg_num(dst), m.base, m.ofs);
} else {
- aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off));
+ aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), m.base, m.ofs));
}
}
@@ -243,7 +371,13 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
u32 src_reg;
u32 src_is_fp = 0;
- if (src.kind == OPK_IMM) {
+ /* Zero immediate stores use wzr/xzr directly (reg 31). Avoids a
+ * separate `mov wN, #0` and frees AA_TMP0 for the address base. */
+ int src_imm_zero = (src.kind == OPK_IMM && src.v.imm == 0 &&
+ src.cls != RC_FP);
+ if (src_imm_zero) {
+ src_reg = 31u;
+ } else if (src.kind == OPK_IMM) {
u32 sf = (sz == 8) ? 1u : 0u;
aa64_emit_load_imm(mc, sf, AA_TMP0, src.v.imm);
src_reg = AA_TMP0;
@@ -253,7 +387,7 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
} else {
src_reg = reg_num(src);
}
- u32 base = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0;
+ u32 base = (src.kind == OPK_IMM && !src_imm_zero) ? AA_TMP1 : AA_TMP0;
if (use_got_for_sym(t, sym)) {
aa64_emit_got_load_addr(t, base, sym);
if (src_is_fp) {
@@ -277,20 +411,60 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
return;
}
- i32 off;
- u32 addr_tmp = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0;
- u32 base = addr_base(t, addr, &off, addr_tmp);
+ /* Zero immediate stores use wzr/xzr directly (reg 31). */
+ int src_imm_zero = (src.kind == OPK_IMM && src.v.imm == 0 &&
+ src.cls != RC_FP);
+ u32 addr_tmp =
+ (src.kind == OPK_IMM && !src_imm_zero) ? AA_TMP1 : AA_TMP0;
+
+ /* Indexed register-offset form for STR when the EA's scale is legal.
+ * Falls back to arch_lower_indexed when LSL doesn't fit the
+ * instruction encoding (e.g. byte access with log2_scale=3). */
+ if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) {
+ u32 S;
+ if (aa_indexed_scale_legal(sidx, addr.v.ind.log2_scale & 0x3u, &S)) {
+ AAAddrMode m = addr_mode(t, addr, addr_tmp);
+ u32 src_reg;
+ if (src_imm_zero) {
+ src_reg = 31u;
+ } else if (src.kind == OPK_IMM) {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ aa64_emit_load_imm(t->mc, sf, AA_TMP0, src.v.imm);
+ src_reg = AA_TMP0;
+ } else {
+ src_reg = reg_num(src);
+ }
+ if (src.cls == RC_FP && !src_imm_zero) {
+ if (sidx == 4u)
+ aa64_emit32(t->mc, aa64_str_q_reg(src_reg, m.base, m.index, S));
+ else
+ aa64_emit32(t->mc,
+ aa64_str_fp_reg(sidx, src_reg, m.base, m.index, S));
+ } else {
+ aa64_emit32(t->mc,
+ aa64_str_reg(sidx, src_reg, m.base, m.index, S));
+ }
+ return;
+ }
+ addr = arch_lower_indexed(t, addr, addr_tmp);
+ }
+
+ AAAddrMode m = addr_mode(t, addr, addr_tmp);
+ if (src_imm_zero) {
+ aa64_emit32(t->mc, aa64_stur(sidx, 31u, m.base, m.ofs));
+ return;
+ }
if (src.kind == OPK_IMM) {
u32 sf = (sz == 8) ? 1u : 0u;
aa64_emit_load_imm(t->mc, sf, AA_TMP0, src.v.imm);
- aa64_emit32(t->mc, aa64_stur(sidx, AA_TMP0, base, off));
+ aa64_emit32(t->mc, aa64_stur(sidx, AA_TMP0, m.base, m.ofs));
return;
}
if (src.cls == RC_FP) {
- aa_emit_str_fp_any(t->mc, sidx, reg_num(src), base, off);
+ aa_emit_str_fp_any(t->mc, sidx, reg_num(src), m.base, m.ofs);
} else {
- aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off));
+ aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), m.base, m.ofs));
}
}
@@ -303,6 +477,7 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
return;
}
if (lv.kind == OPK_INDIRECT) {
+ aa_assert_no_index(t, lv, "addr_of");
i32 ofs = lv.v.ind.ofs;
u32 base = lv.v.ind.base & 0x1f;
aa64_emit_addr_adjust(t->mc, reg_num(dst), base, ofs);
@@ -454,6 +629,20 @@ static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
aa64_emit_addr_adjust(t->mc, scratch, 29, -(i32)s->off);
return scratch;
}
+ if (op.kind == OPK_GLOBAL) {
+ emit_global_addr(t, scratch, op.v.global.sym, op.v.global.addend);
+ return scratch;
+ }
+ if (op.kind == OPK_INDIRECT) {
+ /* Aggregate helpers (copy_bytes/set_bytes, bitfield_*) take plain
+ * pointer addresses; the cg contract guarantees no EA index here. */
+ aa_assert_no_index(t, op, "agg address");
+ u32 base = op.v.ind.base & 0x1fu;
+ i32 ofs = op.v.ind.ofs;
+ if (ofs == 0) return base;
+ aa64_emit_addr_adjust(t->mc, scratch, base, ofs);
+ return scratch;
+ }
compiler_panic(t->c, impl_of(t)->loc,
"aarch64 agg: address kind %d unsupported", (int)op.kind);
}
@@ -917,6 +1106,7 @@ static Operand aa_call_stack_arg_addr(CGTarget* t, u32 stack_offset,
addr.kind = OPK_INDIRECT;
addr.cls = RC_INT;
addr.v.ind.base = tail && !a->omit_frame ? 29u : 31u;
+ addr.v.ind.index = REG_NONE;
addr.v.ind.ofs = (i32)stack_offset;
if (tail && !a->omit_frame) addr.v.ind.ofs += 16;
return addr;
@@ -1064,14 +1254,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
break;
}
case OPK_INDIRECT: {
+ aa_assert_no_index(t, av->storage, "call INT arg storage");
Operand src;
memset(&src, 0, sizeof src);
src.kind = OPK_INDIRECT;
src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.index = REG_NONE;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, AA_TMP0);
- aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off));
+ AAAddrMode m = addr_mode(t, src, AA_TMP0);
+ aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, m.base, m.ofs));
break;
}
default:
@@ -1106,14 +1297,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
break;
}
case OPK_INDIRECT: {
+ aa_assert_no_index(t, av->storage, "call FP arg storage");
Operand src;
memset(&src, 0, sizeof src);
src.kind = OPK_INDIRECT;
src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.index = REG_NONE;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, AA_TMP0);
- aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, base, off);
+ AAAddrMode m = addr_mode(t, src, AA_TMP0);
+ aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, m.base, m.ofs);
break;
}
default:
@@ -1137,14 +1329,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
break;
}
case OPK_INDIRECT: {
+ aa_assert_no_index(t, av->storage, "call FP stack-arg storage");
Operand src;
memset(&src, 0, sizeof src);
src.kind = OPK_INDIRECT;
src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.index = REG_NONE;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, AA_TMP0);
- aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, base, off);
+ AAAddrMode m = addr_mode(t, src, AA_TMP0);
+ aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, m.base, m.ofs);
aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz,
*stack_off, tail);
break;
@@ -1439,6 +1632,7 @@ static Operand aa_call_plan_offset_operand(CGTarget* t, Operand op,
u32 offset) {
if (!offset) return op;
if (op.kind == OPK_INDIRECT) {
+ aa_assert_no_index(t, op, "call plan offset operand");
op.v.ind.ofs += (i32)offset;
} else if (op.kind == OPK_LOCAL) {
AAImpl* a = impl_of(t);
@@ -1446,6 +1640,8 @@ static Operand aa_call_plan_offset_operand(CGTarget* t, Operand op,
if (!s) compiler_panic(t->c, a->loc, "aarch64 call plan: bad slot");
op.kind = OPK_INDIRECT;
op.v.ind.base = 29;
+ op.v.ind.index = REG_NONE;
+ op.v.ind.log2_scale = 0;
op.v.ind.ofs = -(i32)s->off + (i32)offset;
}
return op;
@@ -1931,6 +2127,7 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
MCEmitter* mc = t->mc;
u32 sf = (ma.size == 8) ? 1u : 0u;
+ aa_assert_no_index(t, addr, "atomic_load");
u32 base;
if (addr.kind == OPK_REG) {
base = reg_num(addr);
@@ -1939,6 +2136,14 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot");
base = AA_TMP0;
aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off);
+ } else if (addr.kind == OPK_INDIRECT) {
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
+ if (m.ofs != 0) {
+ aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs);
+ base = AA_TMP0;
+ } else {
+ base = m.base;
+ }
} else {
compiler_panic(t->c, a->loc,
"aarch64 atomic_load: addr kind %d unsupported",
@@ -1969,6 +2174,7 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
"aarch64 atomic_store: src kind %d unsupported",
(int)src.kind);
}
+ aa_assert_no_index(t, addr, "atomic_store");
u32 base;
if (addr.kind == OPK_REG) {
base = reg_num(addr);
@@ -1977,6 +2183,14 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot");
base = AA_TMP0;
aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off);
+ } else if (addr.kind == OPK_INDIRECT) {
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
+ if (m.ofs != 0) {
+ aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs);
+ base = AA_TMP0;
+ } else {
+ base = m.base;
+ }
} else {
compiler_panic(t->c, a->loc,
"aarch64 atomic_store: addr kind %d unsupported",
@@ -2015,6 +2229,7 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
MCEmitter* mc = t->mc;
u32 sf = (ma.size == 8) ? 1u : 0u;
+ aa_assert_no_index(t, addr, "atomic_rmw");
u32 base = AA_TMP0;
if (addr.kind == OPK_REG) {
aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr)));
@@ -2022,6 +2237,10 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot");
aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off);
+ } else if (addr.kind == OPK_INDIRECT) {
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
+ if (m.base != AA_TMP0 || m.ofs != 0)
+ aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs);
} else {
compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported",
(int)addr.kind);
@@ -2068,6 +2287,7 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
u32 sf = (ma.size == 8) ? 1u : 0u;
(void)fail;
+ aa_assert_no_index(t, addr, "atomic_cas");
u32 base = AA_TMP0;
if (addr.kind == OPK_REG)
aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr)));
@@ -2075,6 +2295,10 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot");
aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off);
+ } else if (addr.kind == OPK_INDIRECT) {
+ AAAddrMode m = addr_mode(t, addr, AA_TMP0);
+ if (m.base != AA_TMP0 || m.ofs != 0)
+ aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs);
} else {
compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported",
(int)addr.kind);
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -302,6 +302,8 @@ typedef struct Operand {
} global;
struct {
Reg base;
+ Reg index; /* REG_NONE when no index operand */
+ u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when index == REG_NONE */
i32 ofs;
} ind;
} v;
@@ -1008,6 +1010,13 @@ CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
void cgtarget_finalize(CGTarget*);
void cgtarget_free(CGTarget*);
+/* Helper for backends without a native indexed addressing mode. If addr has
+ * an index (addr.v.ind.index != REG_NONE), materializes
+ * base + (index << log2_scale) into `scratch` and returns a plain
+ * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller
+ * supplies the scratch register from its scratch pool. */
+Operand arch_lower_indexed(CGTarget*, Operand addr, Reg scratch);
+
/* ---- Disassembler hook ----
* Bytes -> records, not frontend-driven lowering, so this is a separate
* hook from CGTarget/MCEmitter. The internal implementation may share
diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c
@@ -421,6 +421,60 @@ static void c_emit_imm_literal(CTarget* t, i64 v) {
cbuf_put_i64(&t->body, v);
}
+/* Address-mode tuple decoded from an OPK_INDIRECT operand. Mirrors the
+ * `addr_mode` helper in the machine-code backends so all targets share a
+ * single in-backend view of `base [+ index << log2_scale] + ofs`. */
+typedef struct CAddrMode {
+ Reg base;
+ Reg index; /* REG_NONE when no index operand */
+ u8 log2_scale; /* meaningful only when index != REG_NONE */
+ i32 ofs;
+} CAddrMode;
+
+static CAddrMode c_addr_mode(Operand addr) {
+ CAddrMode m;
+ m.base = addr.v.ind.base;
+ m.index = addr.v.ind.index;
+ m.log2_scale = addr.v.ind.log2_scale;
+ m.ofs = addr.v.ind.ofs;
+ return m;
+}
+
+/* Emit `(char*)base [+ (uintptr_t)index * (1u << log2_scale)] [+ ofs]` into
+ * the body, with each optional term suppressed when absent. Used by every
+ * OPK_INDIRECT renderer; the caller wraps it with the appropriate
+ * `(*(T*)(...))` or `((T)(...))` cast. */
+static void c_emit_indirect_addr_expr(CTarget* t, CAddrMode m) {
+ char rbuf[24];
+ cbuf_puts(&t->body, "(char*)");
+ c_reg_name(m.base, rbuf, sizeof rbuf);
+ cbuf_puts(&t->body, rbuf);
+ if (m.index != REG_NONE) {
+ cbuf_puts(&t->body, " + (uintptr_t)");
+ c_reg_name(m.index, rbuf, sizeof rbuf);
+ cbuf_puts(&t->body, rbuf);
+ cbuf_puts(&t->body, " * ");
+ /* Spell as the explicit 1/2/4/8 literal corresponding to log2_scale.
+ * log2_scale is normalized to {0,1,2,3} by cg. */
+ cbuf_put_u64(&t->body, (u64)(1u << m.log2_scale));
+ }
+ if (m.ofs != 0) {
+ cbuf_puts(&t->body, " + ");
+ cbuf_put_i64(&t->body, (i64)m.ofs);
+ }
+}
+
+/* Assert that `addr`, if OPK_INDIRECT, has no index operand. Used by paths
+ * the cg layer guarantees never carry the indexed shape (bitfield, atomics,
+ * copy_bytes/set_bytes, inline asm). */
+static void c_assert_no_index(CTarget* t, Operand addr, const char* where) {
+ if (addr.kind != OPK_INDIRECT) return;
+ if (addr.v.ind.index == REG_NONE) return;
+ SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
+ compiler_panic(t->c, loc,
+ "C target: %s: indexed OPK_INDIRECT not allowed here", where);
+}
+
void c_emit_operand(CTarget* t, Operand op) {
char buf[24];
switch (op.kind) {
@@ -467,18 +521,12 @@ void c_emit_operand(CTarget* t, Operand op) {
}
case OPK_INDIRECT: {
/* Used by call paths to pass aggregates by-address: the operand's type
- * is the aggregate, the storage is (base + offset). Emit the deref as
- * a value expression. */
+ * is the aggregate, the storage is `base + index*scale + ofs`. Emit the
+ * deref as a value expression. */
cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, op.type);
- cbuf_puts(&t->body, "*)((char*)");
- char rbuf[24];
- c_reg_name(op.v.ind.base, rbuf, sizeof rbuf);
- cbuf_puts(&t->body, rbuf);
- if (op.v.ind.ofs != 0) {
- cbuf_puts(&t->body, " + ");
- cbuf_put_i64(&t->body, (i64)op.v.ind.ofs);
- }
+ cbuf_puts(&t->body, "*)(");
+ c_emit_indirect_addr_expr(t, c_addr_mode(op));
cbuf_puts(&t->body, "))");
return;
}
@@ -777,29 +825,24 @@ static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_typ
return;
}
case OPK_INDIRECT: {
- Operand base_reg;
- base_reg.kind = OPK_REG;
- base_reg.cls = RC_INT;
- base_reg.type = 0;
- base_reg.v.reg = addr.v.ind.base;
+ CAddrMode m = c_addr_mode(addr);
/* Ensure the base reg is declared. We can't readily look up its type
* post-hoc, so reuse whatever it was first declared with. */
- if ((u32)addr.v.ind.base >= t->reg_cap ||
- !t->reg_declared[addr.v.ind.base]) {
+ if ((u32)m.base >= t->reg_cap || !t->reg_declared[m.base]) {
compiler_panic(t->c, loc,
"C target: indirect on undeclared base reg v%u",
- (unsigned)addr.v.ind.base);
+ (unsigned)m.base);
+ }
+ if (m.index != REG_NONE &&
+ ((u32)m.index >= t->reg_cap || !t->reg_declared[m.index])) {
+ compiler_panic(t->c, loc,
+ "C target: indirect on undeclared index reg v%u",
+ (unsigned)m.index);
}
cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, access_type);
- cbuf_puts(&t->body, "*)((char*)");
- char rbuf[24];
- c_reg_name(addr.v.ind.base, rbuf, sizeof rbuf);
- cbuf_puts(&t->body, rbuf);
- if (addr.v.ind.ofs != 0) {
- cbuf_puts(&t->body, " + ");
- cbuf_put_i64(&t->body, (i64)addr.v.ind.ofs);
- }
+ cbuf_puts(&t->body, "*)(");
+ c_emit_indirect_addr_expr(t, m);
cbuf_puts(&t->body, "))");
return;
}
@@ -842,13 +885,8 @@ static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) {
case OPK_INDIRECT: {
cbuf_puts(&t->body, "((");
c_emit_type(t, &t->body, dst_type);
- cbuf_puts(&t->body, ")((char*)");
- c_reg_name(lv.v.ind.base, buf, sizeof buf);
- cbuf_puts(&t->body, buf);
- if (lv.v.ind.ofs != 0) {
- cbuf_puts(&t->body, " + ");
- cbuf_put_i64(&t->body, (i64)lv.v.ind.ofs);
- }
+ cbuf_puts(&t->body, ")(");
+ c_emit_indirect_addr_expr(t, c_addr_mode(lv));
cbuf_puts(&t->body, "))");
return;
}
@@ -2606,6 +2644,8 @@ void c_va_arg(CGTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) {
void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr,
AggregateAccess m) {
CTarget* t = (CTarget*)T;
+ c_assert_no_index(t, dst_addr, "copy_bytes dst");
+ c_assert_no_index(t, src_addr, "copy_bytes src");
cbuf_puts(&t->body, " __builtin_memcpy(");
c_emit_operand(t, dst_addr);
cbuf_puts(&t->body, ", ");
@@ -2618,6 +2658,7 @@ void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr,
void c_set_bytes(CGTarget* T, Operand dst_addr, Operand byte_value,
AggregateAccess m) {
CTarget* t = (CTarget*)T;
+ c_assert_no_index(t, dst_addr, "set_bytes dst");
cbuf_puts(&t->body, " __builtin_memset(");
c_emit_operand(t, dst_addr);
cbuf_puts(&t->body, ", (int)");
@@ -2700,6 +2741,7 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr,
if (dst.kind != OPK_REG) {
compiler_panic(t->c, loc, "C target: bitfield_load dst must be REG");
}
+ c_assert_no_index(t, addr, "bitfield_load");
if (bf.bit_width == 0) {
/* Zero-width — layout barrier only; nothing to load. Emit a no-op
* assignment so the dst reg still gets a defined value. */
@@ -2771,6 +2813,7 @@ void c_bitfield_store(CGTarget* T, Operand addr, Operand src,
BitFieldAccess bf) {
CTarget* t = (CTarget*)T;
SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
+ c_assert_no_index(t, addr, "bitfield_store");
if (bf.bit_width == 0) return; /* zero-width: no-op */
const char* sty = c_bf_storage_type(bf.storage.size);
if (!sty) {
@@ -2860,6 +2903,8 @@ void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs,
u32 no, Operand* oo, const AsmConstraint* ins, u32 ni,
const Operand* io, const Sym* clobs, u32 nc) {
CTarget* t = (CTarget*)T;
+ for (u32 i = 0; i < no; ++i) c_assert_no_index(t, oo[i], "asm_block out");
+ for (u32 i = 0; i < ni; ++i) c_assert_no_index(t, io[i], "asm_block in");
cbuf_puts(&t->body, " __asm__ __volatile__ (");
c_emit_c_string_literal(&t->body, tmpl ? tmpl : "");
/* Outputs. */
@@ -2992,6 +3037,7 @@ void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m,
MemOrder o) {
CTarget* t = (CTarget*)T;
(void)m;
+ c_assert_no_index(t, addr, "atomic_load");
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
/* __atomic_load_n returns a value of the pointed-to type (dst.type). */
c_emit_reg_assign_open(t, dst.v.reg, dst.type);
@@ -3009,6 +3055,7 @@ void c_atomic_store(CGTarget* T, Operand addr, Operand src, MemAccess m,
MemOrder o) {
CTarget* t = (CTarget*)T;
(void)m;
+ c_assert_no_index(t, addr, "atomic_store");
cbuf_puts(&t->body, " __atomic_store_n((");
c_emit_type(t, &t->body, src.type);
cbuf_puts(&t->body, "*)");
@@ -3038,6 +3085,7 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr,
CTarget* t = (CTarget*)T;
(void)m;
SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
+ c_assert_no_index(t, addr, "atomic_rmw");
const char* fn = c_atomic_op_builtin(op);
if (!fn) {
compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op);
@@ -3063,6 +3111,7 @@ void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr,
MemOrder so, MemOrder fo) {
CTarget* t = (CTarget*)T;
(void)m;
+ c_assert_no_index(t, addr, "atomic_cas");
/* gcc's __atomic_compare_exchange_n needs a real lvalue holding the
* expected value (it's updated on failure). We can't use `&prior_reg`
* directly because CG reuses reg ids across types — the C declaration may
diff --git a/src/arch/cgtarget.c b/src/arch/cgtarget.c
@@ -3,7 +3,10 @@
* The lifecycle helpers (cgtarget_finalize, cgtarget_free) are arch-agnostic
* shims over the vtable. */
+#include <string.h>
+
#include "arch/arch.h"
+#include "cg/type.h"
CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
const ArchImpl* arch = arch_for_compiler(c);
@@ -25,3 +28,77 @@ void cgtarget_free(CGTarget* t) {
if (!t) return;
/* Arena-backed; nothing to free. */
}
+
+/* Default fold for backends without a native indexed addressing mode.
+ *
+ * If `addr` carries an index register (addr.v.ind.index != REG_NONE),
+ * materialize `base + (index << log2_scale)` into `scratch` and return a plain
+ * OPK_INDIRECT(scratch, ofs) with `index == REG_NONE`. Otherwise return `addr`
+ * unchanged.
+ *
+ * The caller supplies `scratch` from its own scratch pool and is responsible
+ * for freeing it after the memop completes. The returned operand keeps the
+ * caller's `type` so the backend's load/store sees the same access type. */
+Operand arch_lower_indexed(CGTarget* t, Operand addr, Reg scratch) {
+ Operand scratch_op;
+ Operand shifted;
+ CfreeCgTypeId ty;
+ u8 log2;
+
+ if (addr.kind != OPK_INDIRECT || addr.v.ind.index == REG_NONE) return addr;
+
+ ty = builtin_id(CFREE_CG_BUILTIN_I64);
+ log2 = addr.v.ind.log2_scale;
+ scratch_op.kind = OPK_REG;
+ scratch_op.cls = RC_INT;
+ scratch_op.pad = 0;
+ scratch_op.type = ty;
+ scratch_op.v.reg = scratch;
+
+ shifted.kind = OPK_REG;
+ shifted.cls = RC_INT;
+ shifted.pad = 0;
+ shifted.type = ty;
+ shifted.v.reg = addr.v.ind.index;
+
+ if (log2 == 0) {
+ /* index * 1: just add the index directly to the base. */
+ Operand base_op;
+ base_op.kind = OPK_REG;
+ base_op.cls = RC_INT;
+ base_op.pad = 0;
+ base_op.type = ty;
+ base_op.v.reg = addr.v.ind.base;
+ t->binop(t, BO_IADD, scratch_op, base_op, shifted);
+ } else {
+ /* scratch = index << log2 */
+ Operand shamt;
+ Operand base_op;
+ shamt.kind = OPK_IMM;
+ shamt.cls = RC_INT;
+ shamt.pad = 0;
+ shamt.type = ty;
+ shamt.v.imm = (i64)log2;
+ t->binop(t, BO_SHL, scratch_op, shifted, shamt);
+ base_op.kind = OPK_REG;
+ base_op.cls = RC_INT;
+ base_op.pad = 0;
+ base_op.type = ty;
+ base_op.v.reg = addr.v.ind.base;
+ /* scratch = base + scratch */
+ t->binop(t, BO_IADD, scratch_op, base_op, scratch_op);
+ }
+
+ {
+ Operand out;
+ memset(&out, 0, sizeof out);
+ out.kind = OPK_INDIRECT;
+ out.cls = RC_INT;
+ out.type = ty;
+ out.v.ind.base = scratch;
+ out.v.ind.index = REG_NONE;
+ out.v.ind.log2_scale = 0;
+ out.v.ind.ofs = addr.v.ind.ofs;
+ return out;
+ }
+}
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -398,7 +398,12 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
rs1 = parse_xreg(d);
return enc_i(m, 0u, rs1, 0);
}
- rd = parse_xreg(d); expect_comma(d);
+ rd = parse_xreg(d);
+ if (!asm_driver_eat_comma(d)) {
+ if (!strcmp(desc->mnemonic, "jalr"))
+ return enc_i(m, RV_RA, rd, 0);
+ asm_driver_panic(d, "rv64 asm: expected ','");
+ }
/* Accept both `jalr rd, imm(rs1)` and `jalr rd, rs1, imm`. */
{
AsmTok t = asm_driver_peek(d);
@@ -732,6 +737,9 @@ static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) {
case 3: /* %aN — memory addressing form */
if (op->kind != OPK_INDIRECT)
inline_panic(a, "%a on non-memory operand");
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "%a on indexed memory operand: rv64 inline asm "
+ "requires base+disp only");
render_indirect(a, sb, op->v.ind.base, op->v.ind.ofs);
return;
case 4: /* %zN — zero-or-reg */
@@ -757,6 +765,9 @@ static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) {
render_imm(sb, op->v.imm);
return;
case OPK_INDIRECT:
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "indexed memory operand in inline asm: rv64 requires "
+ "base+disp only");
render_indirect(a, sb, op->v.ind.base, op->v.ind.ofs);
return;
default:
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -166,7 +166,21 @@ void rv_load(CGTarget *t, Operand dst, Operand addr, MemAccess ma);
void rv_store(CGTarget *t, Operand addr, Operand src, MemAccess ma);
u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off);
u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off);
-u32 addr_base(CGTarget *t, Operand addr, i32 *out_off, u32 tmp_reg);
+
+/* Effective-address tuple returned by addr_mode: `base + (index << log2_scale)
+ * + ofs`, where `index == REG_NONE` means no index operand. rv64 has no
+ * indexed load/store instructions even with Zba, so load/store fold any
+ * index into a scratch register up front via Zba `sh{1,2,3}add` (see
+ * rv_fold_indexed in ops.c); other paths (atomics, spill/reload, ...)
+ * assert that input OPK_INDIRECT operands already have `index == REG_NONE`. */
+typedef struct RvAddrMode {
+ u32 base;
+ u32 index;
+ u8 log2_scale;
+ i32 ofs;
+} RvAddrMode;
+
+RvAddrMode addr_mode(CGTarget *t, Operand addr, u32 tmp_reg);
void rv64_emit_addr_adjust(MCEmitter *mc, u32 rd, u32 base, i32 off);
ObjSymId emit_pcrel_anchor(CGTarget *t, u32 sec, u32 auipc_pos);
void rv64_emit_got_load_addr(CGTarget *t, u32 dst_reg, ObjSymId sym);
diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h
@@ -171,6 +171,15 @@ static inline u32 rv_divuw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, r
static inline u32 rv_remw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, rs1, 0x6, rd, RV_OP_32); }
static inline u32 rv_remuw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, rs1, 0x7, rd, RV_OP_32); }
+/* Zba (address-generation) subset — assumed available on rv64 targets.
+ * SH{1,2,3}ADD rd, rs1, rs2 computes rd = (rs1 << {1,2,3}) + rs2 in one
+ * instruction (funct7=0x10, opcode=OP). Used by load/store to fold an
+ * indexed effective address `base + (index << log2_scale)` into a single
+ * scratch register without an explicit shift+add pair. */
+static inline u32 rv_sh1add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x2, rd, RV_OP); }
+static inline u32 rv_sh2add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x4, rd, RV_OP); }
+static inline u32 rv_sh3add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x6, rd, RV_OP); }
+
/* Loads (funct3: 0=LB,1=LH,2=LW,3=LD,4=LBU,5=LHU,6=LWU) */
static inline u32 rv_lb(u32 rd, u32 rs1, i32 imm) { return rv_i(imm, rs1, 0x0, rd, RV_LOAD); }
static inline u32 rv_lh(u32 rd, u32 rs1, i32 imm) { return rv_i(imm, rs1, 0x1, rd, RV_LOAD); }
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -129,38 +129,57 @@ static void rv_copy(CGTarget* t, Operand dst, Operand src) {
/* ---- address resolution ---- */
-/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into
- * `tmp_reg`. Returns the register holding the base and writes the
- * effective signed offset to *out_off (0 when we synthesized into tmp).
- * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */
-u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
+/* Materialize the address of `addr` (LOCAL or INDIRECT) into a
+ * base-register + signed-offset pair, possibly using `tmp_reg` when the
+ * raw offset exceeds the imm[11:0] range. The returned tuple carries an
+ * optional index (`REG_NONE` for "no index"); rv64 has no indexed loads
+ * or stores even with Zba, so callers must have already folded any index
+ * away (load/store do this via rv_fold_indexed). OPK_GLOBAL is not
+ * handled here — its callers emit AUIPC + an LO12 reloc on the load/store
+ * directly. */
+RvAddrMode addr_mode(CGTarget* t, Operand addr, u32 tmp_reg) {
RImpl* a = impl_of(t);
+ RvAddrMode am = {0};
+ am.index = REG_NONE;
if (addr.kind == OPK_LOCAL) {
RvSlot* s = rv64_slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot");
+ if (!s) compiler_panic(t->c, a->loc, "rv64 addr_mode: bad slot");
i32 off = -(i32)s->off;
if (off >= -2048 && off <= 2047) {
- *out_off = off;
- return RV_S0;
+ am.base = RV_S0;
+ am.ofs = off;
+ return am;
}
rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
rv64_emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg));
- *out_off = 0;
- return tmp_reg;
+ am.base = tmp_reg;
+ am.ofs = 0;
+ return am;
}
if (addr.kind == OPK_INDIRECT) {
+ /* This helper does not encode an index — rv64 has no indexed
+ * load/store even with Zba. Load/store fold the index via
+ * rv_fold_indexed before calling here; all other paths take
+ * pointer-only operands. */
+ if (addr.v.ind.index != REG_NONE) {
+ compiler_panic(t->c, a->loc,
+ "rv64 addr_mode: indexed addressing not supported here "
+ "(caller must fold via rv_fold_indexed)");
+ }
i32 off = addr.v.ind.ofs;
u32 base = addr.v.ind.base & 0x1f;
if (off >= -2048 && off <= 2047) {
- *out_off = off;
- return base;
+ am.base = base;
+ am.ofs = off;
+ return am;
}
rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
rv64_emit32(t->mc, rv_add(tmp_reg, base, tmp_reg));
- *out_off = 0;
- return tmp_reg;
+ am.base = tmp_reg;
+ am.ofs = 0;
+ return am;
}
- compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported",
+ compiler_panic(t->c, a->loc, "rv64 addr_mode: kind %d unsupported",
(int)addr.kind);
}
@@ -219,6 +238,33 @@ void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) {
rv64_emit32(mc, rv_add(rd, base, RV_T1));
}
+/* Fold an indexed OPK_INDIRECT into a plain base+disp by emitting one Zba
+ * `sh{1,2,3}add` (or a plain `add` when log2_scale == 0) into `scratch`.
+ * Returns an OPK_INDIRECT(scratch, ofs) with `index = REG_NONE`. When the
+ * input has no index the operand is returned unchanged. Zba is assumed
+ * available on rv64 targets — no feature gate. */
+static Operand rv_fold_indexed(CGTarget* t, Operand addr, u32 scratch) {
+ if (addr.kind != OPK_INDIRECT || addr.v.ind.index == REG_NONE) return addr;
+ u32 base = addr.v.ind.base & 0x1fu;
+ u32 index = addr.v.ind.index & 0x1fu;
+ u8 s = addr.v.ind.log2_scale;
+ MCEmitter* mc = t->mc;
+ /* sh{1,2,3}add rd, rs1, rs2 = (rs1 << s) + rs2, so rs1=index, rs2=base. */
+ switch (s) {
+ case 0: rv64_emit32(mc, rv_add (scratch, base, index)); break;
+ case 1: rv64_emit32(mc, rv_sh1add(scratch, index, base )); break;
+ case 2: rv64_emit32(mc, rv_sh2add(scratch, index, base )); break;
+ case 3: rv64_emit32(mc, rv_sh3add(scratch, index, base )); break;
+ default: compiler_panic(t->c, impl_of(t)->loc,
+ "rv64 rv_fold_indexed: bad log2_scale %u",
+ (u32)s);
+ }
+ addr.v.ind.base = scratch;
+ addr.v.ind.index = REG_NONE;
+ addr.v.ind.log2_scale = 0;
+ return addr;
+}
+
void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
MCEmitter* mc = t->mc;
@@ -259,14 +305,16 @@ void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
return;
}
- i32 off;
- u32 base = addr_base(t, addr, &off, RV_T0);
+ /* Fold any index via Zba sh{1,2,3}add into RV_T0 first; addr_mode then
+ * sees a plain base+disp. */
+ addr = rv_fold_indexed(t, addr, RV_T0);
+ RvAddrMode am = addr_mode(t, addr, RV_T0);
if (dst.cls == RC_FP) {
- if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), base, off));
- else rv64_emit32(mc, rv_flw(reg_num(dst), base, off));
+ if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), am.base, am.ofs));
+ else rv64_emit32(mc, rv_flw(reg_num(dst), am.base, am.ofs));
} else {
int sx = type_is_signed(addr.type);
- rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off));
+ rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), am.base, am.ofs));
}
}
@@ -318,20 +366,24 @@ void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
return;
}
- i32 off;
- u32 base = addr_base(t, addr, &off,
- (src.kind == OPK_IMM) ? RV_T1 : RV_T0);
+ /* Fold any index into a scratch via Zba sh{1,2,3}add. RV_T0 stays free
+ * for the IMM-src temporary in the OPK_IMM branch below, so route the
+ * fold scratch to RV_T1 in that case; the index-fold scratch matches
+ * addr_mode's tmp_reg. */
+ u32 addr_tmp = (src.kind == OPK_IMM) ? RV_T1 : RV_T0;
+ addr = rv_fold_indexed(t, addr, addr_tmp);
+ RvAddrMode am = addr_mode(t, addr, addr_tmp);
if (src.kind == OPK_IMM) {
u32 sf = (sz == 8) ? 1u : 0u;
rv64_emit_load_imm(mc, sf, RV_T0, src.v.imm);
- rv64_emit32(mc, enc_int_store(sz, RV_T0, base, off));
+ rv64_emit32(mc, enc_int_store(sz, RV_T0, am.base, am.ofs));
return;
}
if (src.cls == RC_FP) {
- if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), base, off));
- else rv64_emit32(mc, rv_fsw(reg_num(src), base, off));
+ if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), am.base, am.ofs));
+ else rv64_emit32(mc, rv_fsw(reg_num(src), am.base, am.ofs));
} else {
- rv64_emit32(mc, enc_int_store(sz, reg_num(src), base, off));
+ rv64_emit32(mc, enc_int_store(sz, reg_num(src), am.base, am.ofs));
}
}
@@ -352,6 +404,10 @@ static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) {
return;
}
if (lv.kind == OPK_INDIRECT) {
+ if (lv.v.ind.index != REG_NONE) {
+ compiler_panic(t->c, a->loc,
+ "rv64 addr_of: indexed INDIRECT not supported");
+ }
i32 ofs = lv.v.ind.ofs;
u32 base = lv.v.ind.base & 0x1f;
if (ofs >= -2048 && ofs <= 2047) {
@@ -838,6 +894,8 @@ static Operand rv_call_stack_arg_addr(CGTarget* t, u32 stack_offset,
addr.kind = OPK_INDIRECT;
addr.cls = RC_INT;
addr.v.ind.base = tail && !a->omit_frame ? RV_S0 : RV_SP;
+ addr.v.ind.index = REG_NONE;
+ addr.v.ind.log2_scale = 0;
addr.v.ind.ofs = (i32)stack_offset;
if (tail && !a->omit_frame) {
addr.v.ind.ofs += 16 + (a->is_variadic ? 64 : 0);
@@ -895,6 +953,8 @@ static Operand rv_offset_mem_operand(CGTarget* t, Operand op, u32 offset) {
if (!s) compiler_panic(t->c, a->loc, "rv64 offset operand: bad slot");
op.kind = OPK_INDIRECT;
op.v.ind.base = RV_S0;
+ op.v.ind.index = REG_NONE;
+ op.v.ind.log2_scale = 0;
op.v.ind.ofs = -(i32)s->off + (i32)offset;
}
return op;
@@ -961,6 +1021,10 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
rv64_emit32(mc, rv_add(dst_reg, RV_S0, dst_reg));
}
} else if (av->storage.kind == OPK_INDIRECT) {
+ if (av->storage.v.ind.index != REG_NONE) {
+ compiler_panic(t->c, a->loc,
+ "rv64 call byval: indexed storage not supported");
+ }
u32 base = av->storage.v.ind.base & 0x1fu;
i32 off = av->storage.v.ind.ofs;
if (off >= -2048 && off <= 2047) {
@@ -1395,6 +1459,8 @@ static Operand rv_call_plan_offset_operand(CGTarget* t, Operand op,
if (!s) compiler_panic(t->c, a->loc, "rv64 call plan: bad slot");
op.kind = OPK_INDIRECT;
op.v.ind.base = RV_S0;
+ op.v.ind.index = REG_NONE;
+ op.v.ind.log2_scale = 0;
op.v.ind.ofs = -(i32)s->off + (i32)offset;
}
return op;
@@ -1476,6 +1542,10 @@ static void rv_ret(CGTarget* t, const CGABIValue* val) {
src_base_off = -(i32)s->off;
nbytes = s->size;
} else if (val->storage.kind == OPK_INDIRECT) {
+ if (val->storage.v.ind.index != REG_NONE) {
+ compiler_panic(t->c, a->loc,
+ "rv64 ret indirect: indexed storage not supported");
+ }
src_base = val->storage.v.ind.base & 0x1fu;
src_base_off = val->storage.v.ind.ofs;
nbytes = val->size;
@@ -1685,10 +1755,10 @@ static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
if (addr.kind == OPK_REG) {
base = reg_num(addr);
} else if (addr.kind == OPK_LOCAL) {
- i32 off;
- base = addr_base(t, addr, &off, RV_T0);
- if (off) {
- rv64_emit32(mc, rv_addi(RV_T0, base, off));
+ RvAddrMode am = addr_mode(t, addr, RV_T0);
+ base = am.base;
+ if (am.ofs) {
+ rv64_emit32(mc, rv_addi(RV_T0, base, am.ofs));
base = RV_T0;
}
} else {
@@ -1722,9 +1792,9 @@ static void rv_atomic_store(CGTarget* t, Operand addr, Operand src,
if (addr.kind == OPK_REG) {
base = reg_num(addr);
} else if (addr.kind == OPK_LOCAL) {
- i32 off;
- base = addr_base(t, addr, &off, RV_T0);
- if (off) { rv64_emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; }
+ RvAddrMode am = addr_mode(t, addr, RV_T0);
+ base = am.base;
+ if (am.ofs) { rv64_emit32(mc, rv_addi(RV_T0, base, am.ofs)); base = RV_T0; }
} else {
compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI",
(int)addr.kind);
@@ -1747,10 +1817,9 @@ static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
if (addr.kind == OPK_REG) {
rv64_emit32(mc, rv_addi(base, reg_num(addr), 0));
} else if (addr.kind == OPK_LOCAL) {
- i32 off;
- u32 b = addr_base(t, addr, &off, RV_T0);
- if (b != RV_T0 || off) {
- rv64_emit32(mc, rv_addi(base, b, off));
+ RvAddrMode am = addr_mode(t, addr, RV_T0);
+ if (am.base != RV_T0 || am.ofs) {
+ rv64_emit32(mc, rv_addi(base, am.base, am.ofs));
}
} else {
compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI");
@@ -1799,8 +1868,8 @@ static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
u32 base = RV_T0;
if (addr.kind == OPK_REG) rv64_emit32(mc, rv_addi(base, reg_num(addr), 0));
else if (addr.kind == OPK_LOCAL) {
- i32 off; u32 b = addr_base(t, addr, &off, RV_T0);
- if (b != RV_T0 || off) rv64_emit32(mc, rv_addi(base, b, off));
+ RvAddrMode am = addr_mode(t, addr, RV_T0);
+ if (am.base != RV_T0 || am.ofs) rv64_emit32(mc, rv_addi(base, am.base, am.ofs));
} else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI");
u32 ereg = RV_T1, dreg = RV_T2;
if (exp.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, ereg, exp.v.imm);
diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c
@@ -1421,6 +1421,8 @@ static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) {
op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout];
if (form == X64_FORM_A) {
if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand");
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "inline asm: indexed addressing not supported");
render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
return;
}
@@ -1448,6 +1450,8 @@ static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) {
return;
}
if (op->kind == OPK_INDIRECT) {
+ if (op->v.ind.index != REG_NONE)
+ inline_panic(a, "inline asm: indexed addressing not supported");
render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
return;
}
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -246,6 +246,81 @@ void emit_lea(MCEmitter *mc, u32 dst, u32 base, i32 disp) {
debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
}
+/* Common low-level emit for `[base + index<<log2_scale + disp]` GPR/SSE
+ * memory operands. Builds REX with index, opcode(s), and SIB by hand
+ * (the existing `*_pack` helpers route through `x64_pack_mem`, which
+ * forces SIB index = 4 (none)). */
+static void emit_mem_idx_op(MCEmitter *mc, u8 prefix, int w, int force_rex,
+ u8 opc0, u8 opc1, u32 reg, u32 base, u32 index,
+ u32 log2_scale, i32 disp) {
+ u8 buf[16];
+ u32 n = 0;
+ if (prefix) buf[n++] = prefix;
+ if (force_rex)
+ n += x64_pack_rex_force(buf + n, w, reg, index, base);
+ else
+ n += x64_pack_rex(buf + n, w, reg, index, base);
+ if (opc1) {
+ buf[n++] = X64_OPC_TWOBYTE;
+ buf[n++] = opc1;
+ } else {
+ buf[n++] = opc0;
+ }
+ n += x64_pack_mem_sib(buf + n, reg, base, index, log2_scale, disp);
+ mc->emit_bytes(mc, buf, n);
+}
+
+/* mov reg, [base + index<<log2_scale + disp]; size 1/2/4/8. */
+void emit_mov_load_idx(MCEmitter *mc, u32 size, int signed_ext, u32 dst,
+ u32 base, u32 index, u32 log2_scale, i32 disp) {
+ if (index == REG_NONE) {
+ emit_mov_load(mc, size, signed_ext, dst, base, disp);
+ return;
+ }
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu,
+ log2_scale, disp);
+ } else if (size == 4) {
+ emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu,
+ log2_scale, disp);
+ } else if (size == 2) {
+ emit_mem_idx_op(mc, 0, 0, 0, 0,
+ signed_ext ? X64_OPC_MOVSX_W : X64_OPC_MOVZX_W, dst, base,
+ index & 0xFu, log2_scale, disp);
+ } else if (size == 1) {
+ emit_mem_idx_op(mc, 0, 0, 0, 0,
+ signed_ext ? X64_OPC_MOVSX_B : X64_OPC_MOVZX_B, dst, base,
+ index & 0xFu, log2_scale, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* mov [base + index<<log2_scale + disp], src; size 1/2/4/8. */
+void emit_mov_store_idx(MCEmitter *mc, u32 size, u32 src, u32 base, u32 index,
+ u32 log2_scale, i32 disp) {
+ if (index == REG_NONE) {
+ emit_mov_store(mc, size, src, base, disp);
+ return;
+ }
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu,
+ log2_scale, disp);
+ } else if (size == 4) {
+ emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu,
+ log2_scale, disp);
+ } else if (size == 2) {
+ emit_mem_idx_op(mc, X64_OPSIZE_PFX, 0, 0, X64_OPC_MOV_RM_R, 0, src, base,
+ index & 0xFu, log2_scale, disp);
+ } else if (size == 1) {
+ /* Force REX so SIL/DIL/etc are addressable as byte regs. */
+ emit_mem_idx_op(mc, 0, 0, 1, X64_OPC_MOV_RM_R8, 0, src, base, index & 0xFu,
+ log2_scale, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r
* imm32) for !is64. Both 10/5 bytes. */
void x64_emit_load_imm(MCEmitter *mc, int is64, u32 dst, i64 imm) {
@@ -500,6 +575,28 @@ void emit_sse_store(MCEmitter *mc, u8 prefix, u8 opcode, u32 src, u32 base,
if (mc->debug)
debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
}
+void emit_sse_load_idx(MCEmitter *mc, u8 prefix, u8 opcode, u32 dst, u32 base,
+ u32 index, u32 log2_scale, i32 disp) {
+ if (index == REG_NONE) {
+ emit_sse_load(mc, prefix, opcode, dst, base, disp);
+ return;
+ }
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, dst, base, index & 0xFu,
+ log2_scale, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_sse_store_idx(MCEmitter *mc, u8 prefix, u8 opcode, u32 src, u32 base,
+ u32 index, u32 log2_scale, i32 disp) {
+ if (index == REG_NONE) {
+ emit_sse_store(mc, prefix, opcode, src, base, disp);
+ return;
+ }
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, src, base, index & 0xFu,
+ log2_scale, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst,
u32 src) {
u32 ofs = obj_pos(mc->obj, mc->section_id);
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -244,6 +244,12 @@ void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base,
i32 disp);
void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp);
void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp);
+/* Indexed-addressing variants: [base + index<<log2_scale + disp]. Pass
+ * index = REG_NONE to fall back to the plain [base + disp] encoding. */
+void emit_mov_load_idx(MCEmitter* mc, u32 size, int signed_ext, u32 dst,
+ u32 base, u32 index, u32 log2_scale, i32 disp);
+void emit_mov_store_idx(MCEmitter* mc, u32 size, u32 src, u32 base, u32 index,
+ u32 log2_scale, i32 disp);
void emit_ret(MCEmitter* mc);
void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm);
void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src);
@@ -270,6 +276,10 @@ void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base,
i32 disp);
void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base,
i32 disp);
+void emit_sse_load_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base,
+ u32 index, u32 log2_scale, i32 disp);
+void emit_sse_store_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base,
+ u32 index, u32 log2_scale, i32 disp);
void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
u32 src);
diff --git a/src/arch/x64/isa.h b/src/arch/x64/isa.h
@@ -431,6 +431,38 @@ static inline u32 x64_pack_mem(u8* out, u32 reg, u32 base, i32 disp) {
return n;
}
+/* Pack a SIB-form memory operand `[base + index*scale + disp]`. Emits SIB
+ * unconditionally; pass index = 4 (RSP) for the no-index case (the SIB
+ * "no index" encoding). `log2_scale` ∈ {0,1,2,3} for byte scale 1/2/4/8.
+ *
+ * RBP/R13 base needs at least disp8 even when disp == 0 (mod=00 with
+ * SIB base=5 means "no base, disp32 only"). RSP/R12 base requires SIB
+ * regardless — which is what this helper provides. */
+static inline u32 x64_pack_mem_sib(u8* out, u32 reg, u32 base, u32 index,
+ u32 log2_scale, i32 disp) {
+ /* For SIB base encoding, base=5 (RBP/R13) cannot use mod=0; force
+ * disp8/disp32. Other bases can use the standard mod selection. */
+ u32 m;
+ if ((base & 7u) == 5u && disp == 0) {
+ m = 1u; /* disp8 = 0 */
+ } else if (disp == 0) {
+ m = 0u;
+ } else if (disp >= -128 && disp <= 127) {
+ m = 1u;
+ } else {
+ m = 2u;
+ }
+ u32 n = 0;
+ out[n++] = x64_modrm(m, reg, 4u); /* r/m = 4 → SIB follows */
+ out[n++] = x64_sib(log2_scale & 3u, index, base);
+ if (m == 1u) {
+ out[n++] = (u8)(i8)disp;
+ } else if (m == 2u) {
+ n += x64_put_u32le(out + n, (u32)disp);
+ }
+ return n;
+}
+
/* Pack a reg-form ModR/M (mod=3) — one byte. */
static inline u32 x64_pack_rm_reg(u8* out, u32 reg, u32 rm) {
out[0] = x64_modrm(3u, reg, rm);
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -106,22 +106,47 @@ static void x_copy(CGTarget* t, Operand dst, Operand src) {
emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
}
-static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
+/* Resolve an addr operand to the full effective-address tuple
+ * (base, index, log2_scale, ofs). `OPK_LOCAL` resolves to its RBP-relative
+ * slot offset with no index. `OPK_INDIRECT` carries the EA verbatim:
+ * `index == REG_NONE` for plain base+disp, otherwise the SIB scaled-index
+ * form (`log2_scale ∈ {0,1,2,3}` for byte scale 1/2/4/8). */
+static u32 addr_mode(CGTarget* t, Operand addr, u32* out_index,
+ u32* out_log2_scale, i32* out_off) {
XImpl* a = impl_of(t);
if (addr.kind == OPK_LOCAL) {
XSlot* s = x64_slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot");
+ if (!s) compiler_panic(t->c, a->loc, "x64 addr_mode: bad slot");
+ *out_index = REG_NONE;
+ *out_log2_scale = 0;
*out_off = -(i32)s->off;
return X64_RBP;
}
if (addr.kind == OPK_INDIRECT) {
+ *out_index = (addr.v.ind.index == REG_NONE) ? REG_NONE
+ : (addr.v.ind.index & 0xFu);
+ *out_log2_scale = addr.v.ind.log2_scale;
*out_off = addr.v.ind.ofs;
return addr.v.ind.base & 0xFu;
}
- compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported",
+ compiler_panic(t->c, a->loc, "x64 addr_mode: kind %d unsupported",
(int)addr.kind);
}
+/* Plain-base+disp accessor for non-load/store paths (atomics, calls,
+ * spill/reload, copy_bytes/set_bytes, inline asm). Per the EA contract,
+ * those paths always see `index == REG_NONE`; assert that here so any
+ * regression is caught at the boundary. */
+static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
+ u32 idx, ls;
+ u32 base = addr_mode(t, addr, &idx, &ls, out_off);
+ if (idx != REG_NONE) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 addr_base: indexed addr in non-load/store path");
+ }
+ return base;
+}
+
static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
return obj_symbol_extern_via_got(t->c, t->obj, sym);
}
@@ -297,13 +322,15 @@ void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
}
i32 off;
- u32 base = addr_base(t, addr, &off);
+ u32 idx, ls;
+ u32 base = addr_mode(t, addr, &idx, &ls, &off);
if (dst.cls == RC_FP) {
u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off);
+ emit_sse_load_idx(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, idx, ls,
+ off);
} else {
int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
- emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off);
+ emit_mov_load_idx(t->mc, sz, signed_, dst.v.reg & 0xFu, base, idx, ls, off);
}
}
@@ -348,20 +375,22 @@ void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
}
i32 off;
- u32 base = addr_base(t, addr, &off);
+ u32 idx, ls;
+ u32 base = addr_mode(t, addr, &idx, &ls, &off);
if (src.kind == OPK_IMM) {
int w = (sz == 8) ? 1 : 0;
x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
- emit_mov_store(t->mc, sz, X64_RAX, base, off);
+ emit_mov_store_idx(t->mc, sz, X64_RAX, base, idx, ls, off);
return;
}
if (src.cls == RC_FP) {
u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off);
+ emit_sse_store_idx(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, idx, ls,
+ off);
return;
}
- emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off);
+ emit_mov_store_idx(t->mc, sz, src.v.reg & 0xFu, base, idx, ls, off);
}
static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
@@ -373,6 +402,9 @@ static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
return;
}
if (lv.kind == OPK_INDIRECT) {
+ if (lv.v.ind.index != REG_NONE) {
+ x_panic(t, "addr_of: indexed INDIRECT lvalue (cg should fold)");
+ }
emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs);
return;
}
@@ -1102,6 +1134,8 @@ static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset, int tail) {
addr.kind = OPK_INDIRECT;
addr.cls = RC_INT;
addr.v.ind.base = tail && !a->omit_frame ? X64_RBP : X64_RSP;
+ addr.v.ind.index = REG_NONE;
+ addr.v.ind.log2_scale = 0;
addr.v.ind.ofs = (i32)stack_offset + (tail ? 8 : 0);
if (tail && !a->omit_frame) addr.v.ind.ofs = 16 + (i32)stack_offset;
return addr;
diff --git a/src/cg/arith.c b/src/cg/arith.c
@@ -357,14 +357,26 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) {
ma.type = i64_ty;
ma.size = 8;
ma.align = 8;
- if (sz < 8) {
+ if (sz < 8 && ck == CV_SEXT) {
low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty);
low = api_op_reg(low_tmp, i64_ty);
- T->convert(T, ck == CV_SEXT ? CV_SEXT : CV_ZEXT, low, src);
+ T->convert(T, CV_SEXT, low, src);
src_ty = i64_ty;
} else {
low.type = i64_ty;
}
+ if (ck != CV_SEXT && !g->c->target.big_endian) {
+ T->store(T, dst_lv, low, ma);
+ if (low_tmp != REG_NONE) api_free_reg(g, low_tmp, RC_INT);
+ api_release(g, &v);
+ ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty);
+ base = api_op_reg(ar, ptr_ty);
+ T->addr_of(T, base, dst_lv);
+ T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma);
+ api_free_reg(g, ar, RC_INT);
+ api_push(g, api_make_lv(dst_lv, dty));
+ return;
+ }
ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty);
base = api_op_reg(ar, ptr_ty);
T->addr_of(T, base, dst_lv);
@@ -397,6 +409,9 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) {
T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty));
} else if (v.op.kind == OPK_IMM) {
T->load_imm(T, dst, v.op.v.imm);
+ } else if (v.op.kind == OPK_REG) {
+ Operand src_addr = api_op_indirect(v.op.v.reg, 0, dty);
+ T->load(T, dst, src_addr, api_mem_for_lvalue(g, &src_addr, dty));
} else {
compiler_panic(g->c, g->cur_loc,
"CfreeCg: unsupported i128 truncation source");
diff --git a/src/cg/call.c b/src/cg/call.c
@@ -30,6 +30,9 @@ void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty,
} else if (cg_type_is_aggregate(g->c, aty)) {
api_ensure_reg(g, &arg);
Operand st = arg.op;
+ if (!api_is_lvalue_sv(&arg) && st.kind == OPK_REG) {
+ st = api_op_indirect(st.v.reg, 0, aty);
+ }
st.type = aty;
av->storage = st;
av->size = abi_cg_sizeof(g->c->abi, aty);
@@ -335,7 +338,11 @@ void cfree_cg_ret(CfreeCg* g) {
av.abi = &g->fn_abi->ret;
int is_aggregate = cg_type_is_aggregate(g->c, rty);
if (is_aggregate) {
+ api_ensure_reg(g, &v);
av.storage = v.op;
+ if (!api_is_lvalue_sv(&v) && av.storage.kind == OPK_REG) {
+ av.storage = api_op_indirect(av.storage.v.reg, 0, rty);
+ }
av.storage.type = rty;
av.size = abi_cg_sizeof(g->c->abi, rty);
T->ret(T, &av);
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -258,14 +258,19 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
decl.as.object.flags = CFREE_CG_OBJ_READONLY;
api_remember_sym(g, table_sym, arr_ty, decl);
- /* 6. Compute &table[idx] and load the label address. */
- cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */
- cfree_cg_swap(g); /* [table_lv, idx] */
- cfree_cg_index(g, 0); /* [&table[idx]] */
+ /* 6. Load table[idx]: push the table's address, swap so the index is on top,
+ * then load with EA scale = pointer size. */
+ cfree_cg_push_symbol_addr(g, (CfreeCgSym)table_sym, 0); /* [idx, &table] */
+ cfree_cg_swap(g); /* [&table, idx] */
memset(&acc, 0, sizeof acc);
acc.type = void_ptr_ty;
acc.align = (uint32_t)c->target.ptr_align;
- cfree_cg_load(g, acc); /* [label_addr] */
+ {
+ CfreeCgEffAddr ea;
+ ea.offset = 0;
+ ea.scale = (uint32_t)c->target.ptr_size;
+ cfree_cg_load(g, acc, ea); /* [label_addr] */
+ }
/* 7. Indirect branch with the full closed target set (every case +
* default), so backends doing branch-target hardening (BTI/IBT/CFG)
@@ -792,170 +797,6 @@ void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size,
api_release(g, &dst);
}
-void cfree_cg_index(CfreeCg* g, uint64_t offset) {
- ApiSValue idx, base;
- CGTarget* T;
- CfreeCgTypeId base_ty, base_ptr_ty, elem_ty, idx_ty;
- const CgType* base_info;
- u32 elemsz;
- int free_base_op = 0;
- Operand base_op, idx_op, result;
- Reg rr;
- if (!g) return;
- if (offset > INT64_MAX) {
- compiler_panic(g->c, g->cur_loc, "CfreeCg: index offset too large");
- return;
- }
- T = g->target;
- idx = api_pop(g);
- base = api_pop(g);
- api_ensure_reg(g, &base);
- base_ty = api_sv_type(&base);
- base_info = cg_type_get(g->c, base_ty);
- if (base_info && base_info->kind == CFREE_CG_TYPE_PTR) {
- elem_ty = base_info->ptr.pointee;
- base_ptr_ty = base_ty;
- } else if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY &&
- api_is_lvalue_sv(&base)) {
- elem_ty = base_info->array.elem;
- base_ptr_ty = cg_type_ptr_to(g->c, elem_ty);
- } else {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: index base is not a pointer or array lvalue");
- return;
- }
- elemsz = (u32)abi_cg_sizeof(g->c->abi, elem_ty);
- idx_ty = idx.type ? idx.type : idx.op.type;
- if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I32);
- if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY) {
- rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty);
- base_op = api_op_reg(rr, base_ptr_ty);
- T->addr_of(T, base_op, base.op);
- api_release(g, &base);
- free_base_op = 1;
- } else {
- base_op = api_force_reg(g, &base, base_ptr_ty);
- }
- idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
- rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty);
- result = api_op_reg(rr, base_ptr_ty);
- if (idx_op.kind == OPK_IMM) {
- i64 total_offset = idx_op.v.imm * (i64)elemsz + (i64)offset;
- T->binop(T, BO_IADD, result, base_op,
- api_op_imm(total_offset, base_ptr_ty));
- } else {
- Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty);
- Operand scaled = api_op_reg(sr, idx_ty);
- /* Allocating `scaled` can materialize a delayed index expression into a
- * fresh virtual register under opt. Refresh idx_op so the multiply uses
- * the materialized value, not the pre-materialization source operand. */
- idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
- if (idx.op.kind == OPK_REG) idx_op = idx.op;
- T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty));
- if (offset > 0) {
- T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty));
- }
- T->binop(T, BO_IADD, result, base_op, scaled);
- api_free_reg(g, sr, RC_INT);
- }
- if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT);
- if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY)
- api_release(g, &base);
- api_release(g, &idx);
- api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty));
-}
-
-void cfree_cg_field(CfreeCg* g, uint32_t field_index) {
- ApiSValue base;
- CGTarget* T;
- CfreeCgTypeId rec_ty;
- CfreeCgTypeId field_ty;
- CfreeCgTypeId rec_ptr_ty;
- const CgType* rec_info;
- const ABIRecordLayout* layout;
- u32 field_offset;
- Operand result;
- Reg rr;
- if (!g) return;
- T = g->target;
- base = api_pop(g);
- api_ensure_reg(g, &base);
- rec_ty = api_sv_type(&base);
- if (!api_is_lvalue_sv(&base)) {
- compiler_panic(g->c, g->cur_loc, "CfreeCg: field base is not an lvalue");
- return;
- }
- layout = abi_cg_record_layout(g->c->abi, rec_ty);
- if (!layout || field_index >= layout->nfields) {
- compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid field index");
- return;
- }
- rec_info = cg_type_get(g->c, rec_ty);
- if (!rec_info || rec_info->kind != CFREE_CG_TYPE_RECORD ||
- field_index >= rec_info->record.nfields) {
- compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid record base");
- return;
- }
- field_ty = rec_info->record.fields[field_index].type;
- rec_ptr_ty = cg_type_ptr_to(g->c, rec_ty);
- field_offset = layout->fields[field_index].offset;
- if (layout->fields[field_index].bit_width != 0 ||
- (rec_info->record.fields[field_index].flags & CFREE_CG_FIELD_BITFIELD) !=
- 0) {
- Operand base_addr;
- ApiSValue sv;
- BitFieldAccess bf;
- if (layout->fields[field_index].bit_width == 0) {
- compiler_panic(g->c, g->cur_loc, "CfreeCg: zero-width bit-field access");
- api_release(g, &base);
- return;
- }
- base_addr = api_lvalue_addr(g, &base, rec_ptr_ty);
- memset(&bf, 0, sizeof bf);
- bf.field_type = field_ty;
- bf.storage = api_mem_for_lvalue(g, &base_addr, field_ty);
- bf.storage.size = layout->fields[field_index].storage_size;
- bf.storage_offset = layout->fields[field_index].offset;
- bf.bit_offset = layout->fields[field_index].bit_offset;
- bf.bit_width = layout->fields[field_index].bit_width;
- bf.signed_ = rec_info->record.fields[field_index].bit_signed != 0;
- sv = api_make_lv(base_addr, field_ty);
- sv.bitfield_lvalue = 1;
- sv.delayed.bitfield = bf;
- api_release(g, &base);
- api_push(g, sv);
- return;
- }
- if (base.op.kind == OPK_GLOBAL) {
- result =
- api_op_global(base.op.v.global.sym,
- base.op.v.global.addend + (i64)field_offset, field_ty);
- api_push(g, api_make_lv(result, field_ty));
- } else if (base.op.kind == OPK_INDIRECT && field_offset <= (u32)INT32_MAX &&
- base.op.v.ind.ofs <= INT32_MAX - (i32)field_offset) {
- result = api_op_indirect(base.op.v.ind.base,
- base.op.v.ind.ofs + (i32)field_offset, field_ty);
- api_push(g, api_make_lv(result, field_ty));
- } else {
- Operand base_addr;
- rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty);
- base_addr = api_op_reg(rr, rec_ptr_ty);
- T->addr_of(T, base_addr, base.op);
- api_release(g, &base);
- if (field_offset == 0) {
- result = base_addr;
- } else {
- Reg fr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty);
- result = api_op_reg(fr, rec_ptr_ty);
- T->binop(T, BO_IADD, result, base_addr,
- api_op_imm((i64)field_offset, rec_ptr_ty));
- api_free_reg(g, base_addr.v.reg, RC_INT);
- }
- api_push(g,
- api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty));
- }
-}
-
/* ============================================================
* Calls / return
* ============================================================ */
diff --git a/src/cg/data.c b/src/cg/data.c
@@ -615,8 +615,8 @@ void cfree_cg_data_end(CfreeCg* g) {
* time once the wrapped backend's func_begin has set cur_func_sym. The
* helper does not register the symbol with CfreeCg's sym table;
* callers wire its CfreeCg type via api_remember_sym so subsequent
- * cfree_cg_push_symbol_lvalue / cfree_cg_index can address it as an
- * array of pointers. */
+ * cfree_cg_push_symbol_addr + load(scale=ptr_size) can address it as
+ * an array of pointers. */
ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n) {
Compiler* c;
ObjBuilder* ob;
diff --git a/src/cg/internal.h b/src/cg/internal.h
@@ -296,8 +296,6 @@ void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access,
CfreeCgMemAccess src_access);
void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size,
CfreeCgMemAccess dst_access);
-void cfree_cg_index(CfreeCg* g, uint64_t offset);
-void cfree_cg_field(CfreeCg* g, uint32_t field_index);
void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym,
CfreeCgDataDefAttrs attrs);
void cfree_cg_data_common(CfreeCg* g, CfreeCgSym cg_sym, uint64_t size,
@@ -321,7 +319,6 @@ void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs,
void cfree_cg_data_end(CfreeCg* g);
ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n);
DebugTypeId api_debug_type(CfreeCg* g, CfreeCgTypeId id);
-int api_source_flags_addr_taken(u32 flags);
int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty,
CfreeCgLocalAttrs attrs);
CfreeCgLocal api_local_handle(u32 index);
@@ -345,14 +342,11 @@ void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg,
void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local);
void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local);
void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend);
-void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend);
-void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset,
- CfreeCgTypeId result_type);
-void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access);
-void cfree_cg_indirect(CfreeCg* g);
+void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea);
void cfree_cg_addr(CfreeCg* g);
-void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access);
+void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea);
void cfree_cg_dup(CfreeCg* g);
+void cfree_cg_dup2(CfreeCg* g);
void cfree_cg_swap(CfreeCg* g);
void cfree_cg_drop(CfreeCg* g);
int cfree_cg_top_const_int(CfreeCg* g, int64_t* out_value);
@@ -389,6 +383,8 @@ Operand api_op_reg(Reg r, CfreeCgTypeId ty);
Operand api_op_local(FrameSlot s, CfreeCgTypeId ty);
Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty);
Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty);
+Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs,
+ CfreeCgTypeId ty);
u8 api_residency_for(const Operand* o);
ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty);
ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty);
diff --git a/src/cg/local.c b/src/cg/local.c
@@ -1,12 +1,12 @@
#include "cg/internal.h"
-int api_source_flags_addr_taken(u32 flags) {
- return (flags & CFREE_CG_LOCAL_ADDRESS_TAKEN) != 0;
-}
-
int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty,
CfreeCgLocalAttrs attrs) {
- if (api_source_flags_addr_taken(attrs.flags)) return 1;
+ (void)attrs;
+ /* Aggregates (records, arrays), wide16 (f128/i128), vararg state, and any
+ * non-scalar type must live in memory. All scalar locals are placed on the
+ * frame at CG time; opt is responsible for promoting non-escaped scalars
+ * back into PRegs. */
if (api_is_wide16_scalar_type(g->c, ty)) return 1;
return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) ||
cg_type_is_ptr(g->c, ty));
@@ -83,8 +83,6 @@ CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type,
desc.loc = g->cur_loc;
desc.size = abi_cg_sizeof(g->c->abi, type);
desc.align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type);
- if (api_source_flags_addr_taken(attrs.flags))
- desc.flags |= CG_LOCAL_ADDR_TAKEN;
if (api_local_requires_memory(g, ty, attrs))
desc.flags |= CG_LOCAL_MEMORY_REQUIRED;
if (g->target->local)
@@ -134,7 +132,6 @@ CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type,
pd.type = ty;
pd.size = size;
pd.align = align;
- if (api_source_flags_addr_taken(attrs.flags)) pd.flags |= CG_LOCAL_ADDR_TAKEN;
if (api_local_requires_memory(g, ty, attrs))
pd.flags |= CG_LOCAL_MEMORY_REQUIRED;
if (g->fn_abi && index < g->fn_abi->nparams) {
diff --git a/src/cg/memory.c b/src/cg/memory.c
@@ -146,170 +146,458 @@ void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend) {
}
}
-void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend) {
- CfreeCgTypeId ty;
- if (!g) return;
- ty = api_sym_type(g, sym);
- if (!ty) return;
- if (api_sym_is_tls(g, sym)) {
- CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
- Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty);
- Operand dst = api_op_reg(r, ptr_ty);
- g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend);
- api_push(g, api_make_lv(api_op_indirect(r, 0, ty), ty));
- } else {
- api_push(g, api_make_lv(api_op_global((ObjSymId)sym, addend, ty), ty));
+/* ============================================================
+ * Load / addr / store
+ *
+ * The memops dispatch on the TOS shape of `base`:
+ * 1. lvalue base, no index -> fold ea.offset into the operand encoding
+ * (OPK_LOCAL / OPK_GLOBAL / OPK_INDIRECT) and emit a single memop.
+ * 2. lvalue base, scaled index -> take the lvalue's address, then form an
+ * indexed OPK_INDIRECT and emit a single memop.
+ * 3. pointer-rvalue base, no index -> for OPK_GLOBAL fold the offset into
+ * the addend; otherwise use [reg + offset].
+ * 4. pointer-rvalue base, scaled index -> form OPK_INDIRECT against the
+ * pointer register with index/scale.
+ *
+ * Scale normalization (ea.scale != 0):
+ * {1,2,4,8} -> log2_scale 0..3 on OPK_INDIRECT.
+ * otherwise -> compute index*scale into a fresh register, fold into base,
+ * dispatch with index = REG_NONE.
+ * ============================================================ */
+
+/* log2 of a {1,2,4,8} scale, else -1. */
+static int scale_to_log2(uint32_t scale) {
+ switch (scale) {
+ case 1:
+ return 0;
+ case 2:
+ return 1;
+ case 4:
+ return 2;
+ case 8:
+ return 3;
+ default:
+ return -1;
}
}
-void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset,
- CfreeCgTypeId result_type) {
- ApiSValue v;
- CfreeCgTypeId rty;
- CfreeCgTypeId ptr_ty;
- Operand base;
- Operand result;
- Reg rr;
- int want_ptr;
- int base_is_lvalue;
- int free_base = 0;
- if (!g) return;
- rty = resolve_type(g->c, result_type);
- if (!rty) return;
- v = api_pop(g);
- want_ptr = cg_type_is_ptr(g->c, rty);
- base_is_lvalue = api_is_lvalue_sv(&v);
- if (v.source_local != CFREE_CG_LOCAL_NONE)
- api_local_const_clear(api_local_from_handle(g, v.source_local));
- api_ensure_reg(g, &v);
- if (v.op.kind == OPK_GLOBAL) {
- result = api_op_global(v.op.v.global.sym,
- v.op.v.global.addend + byte_offset, rty);
- api_push(g, want_ptr ? api_make_sv(result, rty) : api_make_lv(result, rty));
- return;
+/* Materialize the EA into an OPK_INDIRECT operand suitable for the backend
+ * load/store. `addr` may be OPK_LOCAL, OPK_GLOBAL, or OPK_INDIRECT. The
+ * `offset` is folded into the operand; the indexed form is encoded directly.
+ * If the index path requires arithmetic on a global/local, the address is
+ * first lowered into a register via T->addr_of and then combined.
+ *
+ * The caller owns `index_reg` (REG_NONE if no index). On return,
+ * *out_owned_base is set to the register that the caller must free after the
+ * memop completes (REG_NONE if no new register was allocated). The returned
+ * operand's index register is freed by the caller separately when applicable;
+ * this helper does not free it.
+ */
+static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset,
+ Reg index, u8 log2_scale,
+ CfreeCgTypeId access_ty,
+ Reg* out_owned_base) {
+ CGTarget* T = g->target;
+ CfreeCgTypeId base_ty =
+ cg_type_is_ptr(g->c, addr.type) ? addr.type
+ : cg_type_ptr_to(g->c, access_ty);
+ *out_owned_base = REG_NONE;
+
+ if (index == REG_NONE) {
+ /* No index: fold offset into the operand directly. */
+ if (addr.kind == OPK_LOCAL) {
+ Operand r;
+ if (offset == 0) {
+ r = addr;
+ r.type = access_ty;
+ return r;
+ }
+ /* Local frame slots have no native displacement encoding in OPK_LOCAL;
+ * materialize the base address into a register and apply the offset. */
+ {
+ Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty);
+ Operand base_reg = api_op_reg(br, base_ty);
+ T->addr_of(T, base_reg, addr);
+ if (offset >= INT32_MIN && offset <= INT32_MAX) {
+ *out_owned_base = br;
+ return api_op_indirect(br, (i32)offset, access_ty);
+ }
+ T->binop(T, BO_IADD, base_reg, base_reg,
+ api_op_imm(offset, base_ty));
+ *out_owned_base = br;
+ return api_op_indirect(br, 0, access_ty);
+ }
+ }
+ if (addr.kind == OPK_GLOBAL) {
+ Operand r = api_op_global(addr.v.global.sym, addr.v.global.addend + offset,
+ access_ty);
+ return r;
+ }
+ if (addr.kind == OPK_INDIRECT) {
+ i64 sum = (i64)addr.v.ind.ofs + offset;
+ if (sum >= INT32_MIN && sum <= INT32_MAX) {
+ return api_op_indirect_indexed(addr.v.ind.base, addr.v.ind.index,
+ addr.v.ind.log2_scale, (i32)sum,
+ access_ty);
+ }
+ /* Offset too large for i32 displacement; materialize. */
+ {
+ Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty);
+ Operand base_reg = api_op_reg(br, base_ty);
+ T->copy(T, base_reg, api_op_reg(addr.v.ind.base, base_ty));
+ T->binop(T, BO_IADD, base_reg, base_reg,
+ api_op_imm(offset, base_ty));
+ *out_owned_base = br;
+ return api_op_indirect_indexed(br, addr.v.ind.index,
+ addr.v.ind.log2_scale,
+ addr.v.ind.ofs, access_ty);
+ }
+ }
+ /* OPK_REG (pointer rvalue) */
+ if (offset >= INT32_MIN && offset <= INT32_MAX) {
+ return api_op_indirect(addr.v.reg, (i32)offset, access_ty);
+ }
+ {
+ Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty);
+ Operand base_reg = api_op_reg(br, base_ty);
+ T->binop(T, BO_IADD, base_reg, api_op_reg(addr.v.reg, base_ty),
+ api_op_imm(offset, base_ty));
+ *out_owned_base = br;
+ return api_op_indirect(br, 0, access_ty);
+ }
}
- if (!want_ptr && v.op.kind == OPK_INDIRECT) {
- i64 ofs = (i64)v.op.v.ind.ofs + byte_offset;
- if (ofs >= INT32_MIN && ofs <= INT32_MAX) {
- result = api_op_indirect(v.op.v.ind.base, (i32)ofs, rty);
- api_push(g, api_make_lv(result, rty));
- return;
+
+ /* Indexed form. addr must be reduced to a base register first when it is
+ * not already an OPK_INDIRECT with a free index slot. */
+ if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE &&
+ offset == 0) {
+ /* Reuse existing INDIRECT base; add index and scale. The displacement
+ * stays whatever the operand already had. */
+ return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale,
+ addr.v.ind.ofs, access_ty);
+ }
+ if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE) {
+ i64 sum = (i64)addr.v.ind.ofs + offset;
+ if (sum >= INT32_MIN && sum <= INT32_MAX) {
+ return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale,
+ (i32)sum, access_ty);
}
}
- ptr_ty = want_ptr ? rty : cg_type_ptr_to(g->c, rty);
- if (!base_is_lvalue && cg_type_is_ptr(g->c, api_sv_type(&v)))
- ptr_ty = api_sv_type(&v);
- if (base_is_lvalue) {
- base = api_lvalue_addr(g, &v, ptr_ty);
- free_base = 1;
- } else {
- base = api_force_reg(g, &v, ptr_ty);
+ /* Otherwise, materialize addr into a register and then build the indexed
+ * operand around it. */
+ {
+ Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty);
+ Operand base_reg = api_op_reg(br, base_ty);
+ if (addr.kind == OPK_REG) {
+ T->copy(T, base_reg, api_op_reg(addr.v.reg, base_ty));
+ } else {
+ T->addr_of(T, base_reg, addr);
+ }
+ if (offset != 0) {
+ if (offset >= INT32_MIN && offset <= INT32_MAX) {
+ *out_owned_base = br;
+ return api_op_indirect_indexed(br, index, log2_scale, (i32)offset,
+ access_ty);
+ }
+ T->binop(T, BO_IADD, base_reg, base_reg, api_op_imm(offset, base_ty));
+ }
+ *out_owned_base = br;
+ return api_op_indirect_indexed(br, index, log2_scale, 0, access_ty);
}
- rr = api_alloc_reg_or_spill(g, RC_INT, ptr_ty);
- result = api_op_reg(rr, ptr_ty);
- g->target->binop(g->target, BO_IADD, result, base,
- api_op_imm(byte_offset, ptr_ty));
- if (free_base) api_free_reg(g, base.v.reg, RC_INT);
- api_release(g, &v);
- if (want_ptr) {
- result.type = rty;
- api_push(g, api_make_sv(result, rty));
+}
+
+/* Pop the index operand for a scaled-index memop. Returns the index in a
+ * freshly allocated register that the caller owns and must free after the
+ * memop. Handles the scale-not-in-{1,2,4,8} case by computing index*scale.
+ *
+ * On return:
+ * *out_log2 = log2_scale (0..3) if scale was normalized to one of {1,2,4,8}
+ * or to 0 if we materialized the scaled value (log2=0).
+ */
+static Reg pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) {
+ ApiSValue idx;
+ CfreeCgTypeId idx_ty;
+ int lg2;
+ Operand idx_op;
+ CGTarget* T = g->target;
+ Reg sr;
+ Operand scaled;
+
+ idx = api_pop(g);
+ idx_ty = api_sv_type(&idx);
+ if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I64);
+
+ lg2 = scale_to_log2(scale);
+ if (lg2 >= 0) {
+ *out_log2 = (u8)lg2;
+ /* Always allocate a fresh register so the caller has unambiguous
+ * ownership; copy the index value in. */
+ idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+ sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty);
+ scaled = api_op_reg(sr, idx_ty);
+ if (idx_op.kind == OPK_IMM) {
+ T->load_imm(T, scaled, idx_op.v.imm);
+ } else {
+ /* Re-fetch in case alloc materialized a delayed expression. */
+ idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+ if (idx.op.kind == OPK_REG) idx_op = idx.op;
+ T->copy(T, scaled, idx_op);
+ }
+ api_release(g, &idx);
+ return sr;
+ }
+
+ /* Non-power-of-two scale: materialize index*scale into a fresh register. */
+ idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+ sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty);
+ scaled = api_op_reg(sr, idx_ty);
+ if (idx_op.kind == OPK_IMM) {
+ T->load_imm(T, scaled, idx_op.v.imm * (i64)scale);
} else {
- api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, rty), rty));
+ idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+ if (idx.op.kind == OPK_REG) idx_op = idx.op;
+ T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)scale, idx_ty));
}
+ api_release(g, &idx);
+ *out_log2 = 0;
+ return sr;
}
-/* ============================================================
- * Load / addr / store
- * ============================================================ */
+/* Build a BitFieldAccess descriptor from the CfreeCgMemAccess metadata. */
+static BitFieldAccess bf_from_access(CfreeCg* g, CfreeCgMemAccess access,
+ CfreeCgTypeId field_ty,
+ const Operand* addr) {
+ BitFieldAccess bf;
+ memset(&bf, 0, sizeof bf);
+ bf.field_type = field_ty;
+ bf.storage = api_mem_for_lvalue(g, addr, field_ty);
+ if (access.storage_size) bf.storage.size = access.storage_size;
+ bf.storage_offset = 0;
+ bf.bit_offset = access.bit_offset;
+ bf.bit_width = access.bit_width;
+ bf.signed_ = access.bit_signed ? 1 : 0;
+ return bf;
+}
-void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access) {
- ApiSValue v;
+/* Pop the base for a memop; populate `*base_addr` with an operand the backend
+ * can consume (LOCAL/GLOBAL/INDIRECT for lvalue forms, or REG holding a
+ * pointer for rvalue forms). Returns 1 if `base` is an lvalue, 0 otherwise.
+ *
+ * Sets `*source_local_out` to the lvalue's source_local handle when applicable
+ * (so the caller can update constant tracking). Sets `*lvalue_sv` to a copy of
+ * the popped lvalue so the caller can call api_release on it after the memop;
+ * for rvalue-pointer forms, `*lvalue_sv` is the popped value (used for
+ * release).
+ */
+
+void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
+ ApiSValue base;
+ CGTarget* T;
CfreeCgTypeId ty;
+ CfreeCgTypeId access_ty;
+ Reg owned_base = REG_NONE;
+ Reg owned_index = REG_NONE;
+ u8 log2_scale = 0;
+ Operand mem_op;
+ Reg dst_r;
Operand dst;
+ int is_lvalue;
+ int is_bitfield;
+ int has_index;
if (!g) return;
+ T = g->target;
if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g);
- v = api_pop(g);
- if (!api_is_lvalue_sv(&v)) {
- api_push(g, v);
- return;
- }
- ty = api_mem_access_type(g, access, api_sv_type(&v), "load");
- if (v.bitfield_lvalue) {
- CfreeCgTypeId load_ty = ty;
- Reg rr;
- api_require_scalar_mem_type(g, "load", load_ty);
- rr = api_alloc_reg_or_spill(g, RC_INT, load_ty);
- dst = api_op_reg(rr, load_ty);
- g->target->bitfield_load(g->target, dst, v.op, v.delayed.bitfield);
- api_release(g, &v);
- api_push(g, api_make_sv(dst, load_ty));
- return;
+
+ has_index = (ea.scale != 0);
+ is_bitfield = (access.bit_width != 0);
+
+ if (has_index) {
+ /* Pop and normalize the index first; it sits between base and any
+ * follow-up value (none for load). */
+ owned_index = pop_and_normalize_index(g, ea.scale, &log2_scale);
}
- if (cg_type_is_aggregate(g->c, api_sv_type(&v))) {
- u32 access_size;
- u32 lvalue_size;
- if (!cg_type_is_aggregate(g->c, ty)) {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: load scalar access from aggregate lvalue "
- "requires selecting a field");
+
+ base = api_pop(g);
+ is_lvalue = api_is_lvalue_sv(&base);
+
+ /* Aggregate / non-EA fast paths only apply to the no-index, no-bitfield
+ * case where the result is the lvalue itself (matches old behavior).
+ * Scalar accesses at an offset into an aggregate lvalue are the canonical
+ * field-access pattern under the EA model and fall through to the normal
+ * scalar load path below. */
+ if (!has_index && !is_bitfield && is_lvalue && ea.offset == 0 &&
+ cg_type_is_aggregate(g->c, api_sv_type(&base))) {
+ ty = api_mem_access_type(g, access, api_sv_type(&base), "load");
+ if (cg_type_is_aggregate(g->c, ty)) {
+ u32 access_size = api_mem_type_size(g, ty, "load");
+ u32 lvalue_size = api_mem_type_size(g, api_sv_type(&base), "load");
+ if (access_size != lvalue_size) {
+ compiler_panic(g->c, g->cur_loc,
+ "CfreeCg: load aggregate type/size mismatch: access "
+ "size %u, lvalue size %u",
+ (unsigned)access_size, (unsigned)lvalue_size);
+ }
+ api_push(g, base);
+ return;
}
- access_size = api_mem_type_size(g, ty, "load");
- lvalue_size = api_mem_type_size(g, api_sv_type(&v), "load");
- if (access_size != lvalue_size) {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: load aggregate type/size mismatch: access size "
- "%u, lvalue size %u",
- (unsigned)access_size, (unsigned)lvalue_size);
+ /* Scalar access from aggregate lvalue: fall through. */
+ }
+
+ /* From here on, base must reduce to something we can address. */
+ if (!is_lvalue && cg_type_is_aggregate(g->c, api_sv_type(&base))) {
+ /* Pointer rvalue to aggregate without any EA -- return as-is. */
+ if (!has_index && !is_bitfield && ea.offset == 0) {
+ api_push(g, base);
+ return;
}
- api_push(g, v);
- return;
}
- api_require_scalar_mem_type(g, "load", ty);
- if (api_is_wide16_scalar_type(g->c, ty)) {
- v.type = ty;
- v.op.type = ty;
- api_push(g, v);
+
+ ty = api_mem_access_type(g, access, api_sv_type(&base), "load");
+ access_ty = ty;
+
+ if (!has_index && !is_bitfield && !is_lvalue &&
+ base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL &&
+ (cg_type_is_aggregate(g->c, ty) ||
+ api_is_wide16_scalar_type(g->c, ty))) {
+ base.type = ty;
+ base.op.type = ty;
+ base.lvalue = 1;
+ api_push(g, base);
return;
}
- if (v.source_local != CFREE_CG_LOCAL_NONE &&
- api_local_const_load(g, v.source_local, access, &dst)) {
- api_release(g, &v);
+
+ if (!is_bitfield) api_require_scalar_mem_type(g, "load", access_ty);
+
+ /* Source-local constant load (only the plain, no-EA case is tracked). */
+ if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue &&
+ base.source_local != CFREE_CG_LOCAL_NONE &&
+ api_local_const_load(g, base.source_local, access, &dst)) {
+ api_release(g, &base);
api_push(g, api_make_sv(dst, dst.type));
return;
}
- api_ensure_reg(g, &v);
- if (v.source_local != CFREE_CG_LOCAL_NONE && v.op.kind == OPK_REG) {
- dst = v.op;
- dst.type = ty;
- v.op = dst;
- v.type = ty;
- v.lvalue = 0;
- v.res = RES_FIXED_REG;
- api_push(g, v);
+
+ /* Source-local register lvalue (param in a hard reg): plain no-EA load
+ * returns the register value directly. */
+ if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue &&
+ base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) {
+ Operand val = base.op;
+ val.type = ty;
+ base.op = val;
+ base.type = ty;
+ base.lvalue = 0;
+ base.res = RES_FIXED_REG;
+ api_push(g, base);
return;
}
- dst = api_force_reg(g, &v, ty);
- dst.type = ty;
- api_push(g, api_make_sv(dst, ty));
-}
-void cfree_cg_indirect(CfreeCg* g) {
- ApiSValue ptr;
- CfreeCgTypeId pty;
- CfreeCgTypeId pointee;
- Operand ptr_op;
- if (!g) return;
- ptr = api_pop(g);
- pty = api_sv_type(&ptr);
- pointee = cg_type_pointee(g->c, pty);
- if (!pointee || cg_type_is_void(g->c, pointee)) {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: indirect operand is not a pointer to object");
+ /* Wide-16 scalar lvalue load: keep the addressable storage as the value.
+ * For fields at a fixed offset, fold the EA into a new lvalue operand instead
+ * of asking the backend for a single 16-byte register load. */
+ if (!has_index && !is_bitfield && is_lvalue &&
+ api_is_wide16_scalar_type(g->c, ty)) {
+ if (ea.offset == 0) {
+ base.type = ty;
+ base.op.type = ty;
+ api_push(g, base);
+ return;
+ }
+ if (!api_operand_can_address(&base.op)) {
+ CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
+ Operand addr = api_lvalue_addr(g, &base, pty);
+ mem_op = fold_ea_into_operand(g, addr, ea.offset, REG_NONE, 0, ty,
+ &owned_base);
+ if (owned_base == REG_NONE) owned_base = addr.v.reg;
+ else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT);
+ } else {
+ mem_op = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0, ty,
+ &owned_base);
+ }
+ if (mem_op.kind == OPK_INDIRECT && owned_base == REG_NONE &&
+ base.op.kind == OPK_INDIRECT) {
+ base.res = RES_INHERENT;
+ }
+ api_release(g, &base);
+ api_push(g, api_make_lv(mem_op, ty));
return;
}
- ptr_op = api_force_reg(g, &ptr, pty);
- api_push(g, api_make_lv(api_op_indirect(ptr_op.v.reg, 0, pointee), pointee));
+
+ /* Compute the memop operand. Lvalue bases preserve named-storage operands;
+ * pointer rvalues use the register holding the address. */
+ if (is_lvalue) {
+ if (!api_operand_can_address(&base.op)) {
+ /* Source-local in a hard register but we need to compute an EA: take
+ * the lvalue's address first. */
+ CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
+ Operand addr = api_lvalue_addr(g, &base, pty);
+ mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ /* `addr` is an owned register from api_lvalue_addr. */
+ if (owned_base == REG_NONE) owned_base = addr.v.reg;
+ else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT);
+ } else {
+ /* The lvalue carries its own operand; fold the EA into it. */
+ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ }
+ } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) {
+ /* Pointer-rvalue OPK_GLOBAL: fold the EA directly against the global
+ * (matching the lvalue OPK_GLOBAL path) so the backend can emit a single
+ * PC-relative or absolute access. */
+ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ } else {
+ /* Pointer rvalue: ensure the address is in a register and treat that as
+ * the base. */
+ CfreeCgTypeId pty = api_sv_type(&base);
+ Operand ptr_op = api_force_reg(g, &base, pty);
+ mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ }
+
+ /* Mutate source-local tracking. Any EA-shaped load through a tracked local
+ * (offset != 0 or has_index or non-matching access) cannot use the cached
+ * scalar value: clear it. */
+ if (is_lvalue && base.source_local != CFREE_CG_LOCAL_NONE) {
+ api_local_const_clear(api_local_from_handle(g, base.source_local));
+ }
+
+ if (is_bitfield) {
+ BitFieldAccess bf = bf_from_access(g, access, access_ty, &mem_op);
+ Reg rr = api_alloc_reg_or_spill(g, RC_INT, access_ty);
+ dst = api_op_reg(rr, access_ty);
+ T->bitfield_load(T, dst, mem_op, bf);
+ } else {
+ dst_r = api_alloc_reg_or_spill(g, api_type_class(access_ty), access_ty);
+ dst = api_op_reg(dst_r, access_ty);
+ T->load(T, dst, mem_op, api_mem_from_access(g, &mem_op, access));
+ }
+
+ /* Release the base lvalue/rvalue and any owned registers. */
+ if (is_lvalue) {
+ /* If the original lvalue's operand was OPK_INDIRECT, its base register
+ * was owned by the lvalue and is still in mem_op.v.ind.base when we did
+ * not allocate a new owned_base. Free that base when no new owned_base
+ * shadows it. */
+ if (base.op.kind == OPK_INDIRECT && owned_base == REG_NONE) {
+ /* mem_op uses the same base register as base.op; free it via the
+ * lvalue release. */
+ }
+ api_release(g, &base);
+ } else {
+ /* For rvalue-pointer bases, the register holding the pointer was the
+ * owned reg of `base`; api_release will free it unless the EA folding
+ * already absorbed it into mem_op. The fold_ea_into_operand path for
+ * REG returns either OPK_INDIRECT(addr.v.reg, ofs) (no new owned_base)
+ * or a freshly allocated owned_base. In either case api_release(&base)
+ * frees the pointer register; that is fine because we already issued
+ * the memop. */
+ api_release(g, &base);
+ }
+ if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT);
+ if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT);
+
+ api_push(g, api_make_sv(dst, access_ty));
}
void cfree_cg_addr(CfreeCg* g) {
@@ -318,11 +606,6 @@ void cfree_cg_addr(CfreeCg* g) {
Operand dst;
if (!g) return;
v = api_pop(g);
- if (v.bitfield_lvalue) {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: cannot take address of bit-field");
- return;
- }
pty = cg_type_ptr_to(g->c, api_sv_type(&v));
if (v.source_local != CFREE_CG_LOCAL_NONE)
api_local_const_address_taken(g, v.source_local);
@@ -331,72 +614,82 @@ void cfree_cg_addr(CfreeCg* g) {
api_push(g, api_make_sv(dst, pty));
}
-void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) {
- ApiSValue lv, rv;
+void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
+ ApiSValue base, rv;
CGTarget* T;
CfreeCgTypeId ty;
+ CfreeCgTypeId access_ty;
Operand src;
+ Reg owned_base = REG_NONE;
+ Reg owned_index = REG_NONE;
+ u8 log2_scale = 0;
+ Operand mem_op;
+ int is_lvalue;
+ int is_bitfield;
+ int has_index;
int scalar_aggregate_store = 0;
if (!g) return;
- if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g);
T = g->target;
+ if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g);
+
+ has_index = (ea.scale != 0);
+ is_bitfield = (access.bit_width != 0);
+
+ /* Stack:
+ * no index: [base, value] - pop value, then index (none), then base
+ * indexed: [base, index, value] - pop value, then index, then base
+ */
rv = api_pop(g);
- lv = api_pop(g);
- if (!api_is_lvalue_sv(&lv)) {
- compiler_panic(g->c, g->cur_loc,
- "CfreeCg: store destination is not an lvalue");
- return;
+ if (has_index) {
+ owned_index = pop_and_normalize_index(g, ea.scale, &log2_scale);
}
- ty = api_mem_access_type(g, access, api_sv_type(&lv), "store");
- if (lv.bitfield_lvalue) {
- api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
- if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL ||
- (access.flags & CFREE_CG_MEM_VOLATILE)) {
- api_local_const_memory_boundary(g);
- }
- if (api_sv_op_is_reg_or_imm(&rv)) {
- src = rv.op;
- } else {
- src = api_force_reg(g, &rv, api_sv_type(&rv));
- }
- T->bitfield_store(T, lv.op, src, lv.delayed.bitfield);
- api_release(g, &lv);
- api_release(g, &rv);
- return;
- }
- if (cg_type_is_aggregate(g->c, api_sv_type(&lv)) &&
- !cg_type_is_aggregate(g->c, api_sv_type(&rv)) &&
- !cg_type_is_aggregate(g->c, ty)) {
- u32 access_size = api_mem_type_size(g, ty, "store");
- u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store");
- u32 value_size = api_mem_type_size(g, api_sv_type(&rv), "store");
- if (access_size != dst_size || value_size != dst_size) {
+ base = api_pop(g);
+ is_lvalue = api_is_lvalue_sv(&base);
+
+ if (!is_lvalue) {
+ /* The "destination is not an lvalue" diagnostic now only fires when the
+ * popped base is neither an lvalue nor a pointer-typed rvalue. */
+ if (!cg_type_is_ptr(g->c, api_sv_type(&base))) {
compiler_panic(g->c, g->cur_loc,
- "CfreeCg: store scalar/aggregate size mismatch: access "
- "size %u, destination size %u, value size %u",
- (unsigned)access_size, (unsigned)dst_size,
- (unsigned)value_size);
+ "CfreeCg: store destination is not an lvalue or pointer");
+ return;
}
- scalar_aggregate_store = 1;
}
- if (!scalar_aggregate_store &&
+
+ ty = api_mem_access_type(g, access, api_sv_type(&base), "store");
+ access_ty = ty;
+
+ /* Aggregate store (no EA): memcpy through src lvalue. Only triggers when
+ * the access type itself is aggregate. Scalar stores at an offset into an
+ * aggregate lvalue are field-stores under the EA model and fall through to
+ * the scalar store path. */
+ (void)scalar_aggregate_store;
+ if (!has_index && !is_bitfield && ea.offset == 0 &&
(cg_type_is_aggregate(g->c, ty) ||
- cg_type_is_aggregate(g->c, api_sv_type(&lv)) ||
cg_type_is_aggregate(g->c, api_sv_type(&rv)))) {
CfreeCgTypeId ptr_ty;
Operand dst_addr, src_addr;
+ int dst_addr_owned;
+ int src_addr_owned;
+ int src_ptr_rvalue;
AggregateAccess agg;
- u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store");
- u32 src_size = api_mem_type_size(g, api_sv_type(&rv), "store");
+ u32 src_size;
+ u32 dst_size = cg_type_is_aggregate(g->c, ty)
+ ? api_mem_type_size(g, ty, "store")
+ : api_mem_type_size(g, api_sv_type(&base), "store");
u32 access_size = cg_type_is_aggregate(g->c, ty)
? api_mem_type_size(g, ty, "store")
: dst_size;
- if (!api_is_lvalue_sv(&rv)) {
+ src_ptr_rvalue =
+ !api_is_lvalue_sv(&rv) && cg_type_is_ptr(g->c, api_sv_type(&rv));
+ src_size = src_ptr_rvalue ? access_size
+ : api_mem_type_size(g, api_sv_type(&rv), "store");
+ if (!api_is_lvalue_sv(&rv) && !src_ptr_rvalue) {
compiler_panic(g->c, g->cur_loc,
"CfreeCg: aggregate store source is not an lvalue");
}
- if (!cg_type_is_aggregate(g->c, api_sv_type(&lv)) ||
- !cg_type_is_aggregate(g->c, api_sv_type(&rv)) ||
+ if ((is_lvalue && !cg_type_is_aggregate(g->c, api_sv_type(&base))) ||
+ (!src_ptr_rvalue && !cg_type_is_aggregate(g->c, api_sv_type(&rv))) ||
access_size != dst_size || access_size != src_size) {
compiler_panic(g->c, g->cur_loc,
"CfreeCg: store aggregate type/size mismatch: access "
@@ -404,45 +697,87 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) {
(unsigned)access_size, (unsigned)dst_size,
(unsigned)src_size);
}
- if (lv.source_local != CFREE_CG_LOCAL_NONE) {
- api_local_const_clear(api_local_from_handle(g, lv.source_local));
- } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL ||
+ if (base.source_local != CFREE_CG_LOCAL_NONE) {
+ api_local_const_clear(api_local_from_handle(g, base.source_local));
+ } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
(access.flags & CFREE_CG_MEM_VOLATILE)) {
api_local_const_memory_boundary(g);
}
- ptr_ty = cg_type_ptr_to(g->c, api_sv_type(&lv));
- dst_addr = api_lvalue_addr(g, &lv, ptr_ty);
- src_addr = api_lvalue_addr(g, &rv, ptr_ty);
+ ptr_ty = cg_type_ptr_to(g->c, ty);
+ if (is_lvalue) {
+ dst_addr = api_lvalue_addr(g, &base, ptr_ty);
+ dst_addr_owned = 1;
+ } else {
+ dst_addr = api_force_reg(g, &base, api_sv_type(&base));
+ dst_addr_owned = 0;
+ }
+ if (src_ptr_rvalue) {
+ src_addr = api_force_reg(g, &rv, api_sv_type(&rv));
+ src_addr_owned = 0;
+ } else {
+ src_addr = api_lvalue_addr(g, &rv, ptr_ty);
+ src_addr_owned = 1;
+ }
memset(&agg, 0, sizeof agg);
agg.size = access_size;
agg.align = access.align ? access.align
- : abi_cg_alignof(g->c->abi, api_sv_type(&lv));
+ : abi_cg_alignof(g->c->abi, ty);
T->copy_bytes(T, dst_addr, src_addr, agg);
- api_free_reg(g, dst_addr.v.reg, RC_INT);
- api_free_reg(g, src_addr.v.reg, RC_INT);
- api_release(g, &lv);
+ if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT);
+ if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT);
+ api_release(g, &base);
api_release(g, &rv);
return;
}
- api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
- if (api_is_wide16_scalar_type(g->c, ty)) {
- if (lv.source_local != CFREE_CG_LOCAL_NONE) {
- api_local_const_clear(api_local_from_handle(g, lv.source_local));
- } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL ||
+
+ if (!is_bitfield) api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
+
+ /* Wide-16 scalar store: keep the pre-existing wide16 lowering for the plain
+ * (no-EA) case. */
+ if (!has_index && !is_bitfield && api_is_wide16_scalar_type(g->c, ty)) {
+ if (base.source_local != CFREE_CG_LOCAL_NONE) {
+ api_local_const_clear(api_local_from_handle(g, base.source_local));
+ } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
(access.flags & CFREE_CG_MEM_VOLATILE)) {
api_local_const_memory_boundary(g);
}
if (api_is_lvalue_sv(&rv)) {
CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
- Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty);
- Operand src_addr = api_lvalue_addr(g, &rv, ptr_ty);
+ Operand dst_addr;
+ Operand src_addr;
+ int dst_addr_owned = 0;
+ int src_addr_owned = 0;
AggregateAccess agg;
+ if (is_lvalue && base.op.kind == OPK_LOCAL) {
+ if (ea.offset == 0) {
+ dst_addr = base.op;
+ } else {
+ dst_addr = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0,
+ ty, &owned_base);
+ dst_addr_owned = owned_base != REG_NONE;
+ }
+ } else if (is_lvalue) {
+ dst_addr = api_lvalue_addr(g, &base, ptr_ty);
+ dst_addr_owned = 1;
+ } else {
+ dst_addr = api_force_reg(g, &base, api_sv_type(&base));
+ }
+ if (rv.op.kind == OPK_LOCAL) {
+ src_addr = rv.op;
+ } else {
+ src_addr = api_lvalue_addr(g, &rv, ptr_ty);
+ src_addr_owned = 1;
+ }
memset(&agg, 0, sizeof agg);
agg.size = 16;
agg.align = access.align ? access.align : 16;
T->copy_bytes(T, dst_addr, src_addr, agg);
- api_free_reg(g, dst_addr.v.reg, RC_INT);
- api_free_reg(g, src_addr.v.reg, RC_INT);
+ if (dst_addr_owned) {
+ api_free_reg(g, dst_addr.kind == OPK_INDIRECT ? dst_addr.v.ind.base
+ : dst_addr.v.reg,
+ RC_INT);
+ }
+ if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT);
} else if (rv.op.kind == OPK_IMM) {
u8 bytes[16];
u64 lo = (u64)rv.op.v.imm;
@@ -454,65 +789,125 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) {
bytes[lo_idx] = (u8)(lo >> (i * 8u));
bytes[hi_idx] = (u8)(hi >> (i * 8u));
}
- if (lv.op.kind == OPK_LOCAL) {
- api_store_f128_bytes(g, lv.op.v.frame_slot, ty, bytes);
+ if (base.op.kind == OPK_LOCAL) {
+ api_store_f128_bytes(g, base.op.v.frame_slot, ty, bytes);
} else {
FrameSlot slot = api_f128_temp_slot(g, ty);
ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty);
CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
- Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty);
+ Operand dst_addr;
Operand src_addr;
+ int dst_addr_owned = 0;
AggregateAccess agg;
api_store_f128_bytes(g, slot, ty, bytes);
+ if (is_lvalue) {
+ dst_addr = api_lvalue_addr(g, &base, ptr_ty);
+ dst_addr_owned = 1;
+ } else {
+ dst_addr = api_force_reg(g, &base, api_sv_type(&base));
+ }
src_addr = api_lvalue_addr(g, &tmp, ptr_ty);
memset(&agg, 0, sizeof agg);
agg.size = 16;
agg.align = access.align ? access.align : 16;
T->copy_bytes(T, dst_addr, src_addr, agg);
- api_free_reg(g, dst_addr.v.reg, RC_INT);
+ if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT);
api_free_reg(g, src_addr.v.reg, RC_INT);
}
} else {
src = api_force_reg(g, &rv, ty);
- T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access));
+ T->store(T, base.op, src, api_mem_from_access(g, &base.op, access));
}
- api_release(g, &lv);
+ api_release(g, &base);
api_release(g, &rv);
return;
}
- api_ensure_reg(g, &lv);
+
+ /* General EA-shaped scalar / bit-field store. Resolve the EA into a
+ * single operand the backend can consume. */
+
+ /* Compute the source operand first so its register lifetime doesn't
+ * overlap any EA-arith we issue. */
api_ensure_reg(g, &rv);
if (api_sv_op_is_reg_or_imm(&rv)) {
src = rv.op;
} else {
src = api_force_reg(g, &rv, api_sv_type(&rv));
}
- if (lv.source_local != CFREE_CG_LOCAL_NONE) {
- if (src.kind == OPK_IMM) {
- api_local_const_store(g, lv.source_local, access, src.v.imm);
- } else {
- api_local_const_clear(api_local_from_handle(g, lv.source_local));
- }
- } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL ||
- (access.flags & CFREE_CG_MEM_VOLATILE)) {
- api_local_const_memory_boundary(g);
- }
- if (lv.source_local != CFREE_CG_LOCAL_NONE && lv.op.kind == OPK_REG) {
- Operand dst = lv.op;
+
+ /* Source-local register-resident lvalue, plain no-EA store: just copy
+ * into the bound hard register. This must run before the general EA path;
+ * otherwise api_lvalue_addr would unnecessarily home the local and mark its
+ * address taken. */
+ if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue &&
+ base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) {
+ Operand dst = base.op;
dst.type = ty;
if (src.kind == OPK_IMM) {
T->load_imm(T, dst, src.v.imm);
- } else if (src.kind == OPK_REG) {
- if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src);
+ api_local_const_store(g, base.source_local, access, src.v.imm);
} else {
- src = api_force_reg(g, &rv, ty);
+ if (src.kind != OPK_REG) src = api_force_reg(g, &rv, ty);
if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src);
+ api_local_const_clear(api_local_from_handle(g, base.source_local));
+ }
+ api_release(g, &base);
+ api_release(g, &rv);
+ if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT);
+ return;
+ }
+
+ if (is_lvalue) {
+ if (!api_operand_can_address(&base.op)) {
+ CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
+ Operand addr = api_lvalue_addr(g, &base, pty);
+ mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ if (owned_base == REG_NONE) owned_base = addr.v.reg;
+ else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT);
+ } else {
+ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
}
+ } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) {
+ /* Pointer-rvalue OPK_GLOBAL: fold EA directly. */
+ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
} else {
- T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access));
+ CfreeCgTypeId pty = api_sv_type(&base);
+ Operand ptr_op = api_force_reg(g, &base, pty);
+ mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index,
+ log2_scale, access_ty, &owned_base);
+ }
+
+ /* Source-local tracking. Only the plain no-EA scalar-to-scalar store can
+ * fold into a tracked constant; everything else clears tracking. */
+ if (is_lvalue && base.source_local != CFREE_CG_LOCAL_NONE) {
+ if (!has_index && !is_bitfield && ea.offset == 0 && src.kind == OPK_IMM) {
+ api_local_const_store(g, base.source_local, access, src.v.imm);
+ } else {
+ api_local_const_clear(api_local_from_handle(g, base.source_local));
+ }
+ } else if (is_lvalue &&
+ (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
+ (access.flags & CFREE_CG_MEM_VOLATILE))) {
+ api_local_const_memory_boundary(g);
+ } else if (!is_lvalue) {
+ /* Store through pointer is a memory write -- be conservative. */
+ api_local_const_memory_boundary(g);
}
- api_release(g, &lv);
+
+ if (is_bitfield) {
+ BitFieldAccess bf = bf_from_access(g, access, access_ty, &mem_op);
+ T->bitfield_store(T, mem_op, src, bf);
+ } else {
+ T->store(T, mem_op, src, api_mem_from_access(g, &mem_op, access));
+ }
+
+ api_release(g, &base);
api_release(g, &rv);
+ if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT);
+ if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT);
}
/* ============================================================
@@ -533,7 +928,20 @@ void cfree_cg_dup(CfreeCg* g) {
if (v.res == RES_FIXED_REG && !api_is_lvalue_sv(&v) &&
v.op.kind == OPK_REG) {
ty = api_owned_reg_type(g, &v);
- r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty);
+ r = api_alloc_reg(g, api_class_of_sv(&v));
+ if (r == (Reg)REG_NONE) {
+ FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v));
+ Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty);
+ g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v));
+ g->stack[g->sp - 1].spill_slot = slot;
+ g->stack[g->sp - 1].res = RES_SPILLED;
+ api_set_owned_reg(&g->stack[g->sp - 1], (Reg)REG_NONE);
+ dup = v;
+ dup.pinned = 0;
+ dup.spill_slot = FRAME_SLOT_NONE;
+ api_push(g, dup);
+ return;
+ }
dst = api_op_reg(r, ty);
g->target->copy(g->target, dst,
api_op_reg((Reg)api_reg_of_sv(&v), ty));
@@ -552,7 +960,21 @@ void cfree_cg_dup(CfreeCg* g) {
}
top->pinned = 1;
ty = api_owned_reg_type(g, &v);
- r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty);
+ r = api_alloc_reg(g, api_class_of_sv(&v));
+ if (r == (Reg)REG_NONE) {
+ FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v));
+ Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty);
+ g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v));
+ top->pinned = 0;
+ top->spill_slot = slot;
+ top->res = RES_SPILLED;
+ api_set_owned_reg(top, (Reg)REG_NONE);
+ dup = v;
+ dup.pinned = 0;
+ dup.spill_slot = FRAME_SLOT_NONE;
+ api_push(g, dup);
+ return;
+ }
dst = api_op_reg(r, ty);
g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty));
g->stack[g->sp - 1].pinned = 0;
@@ -564,6 +986,37 @@ void cfree_cg_dup(CfreeCg* g) {
api_push(g, dup);
}
+/* Duplicate the top two stack slots. The lower of the two is the deeper
+ * element; the higher is TOS. After dup2, the stack contains [a, b, a, b]
+ * where TOS was [..., a, b]. Used to support compound assignment through a
+ * scaled-index lvalue: the frontend duplicates [base, index] so it can
+ * read-modify-write with a single EA expression each side.
+ *
+ * The current implementation duplicates the two slots one at a time using
+ * cfree_cg_dup with a rot3 between them so register/operand sharing stays
+ * correct under the per-slot machinery. */
+void cfree_cg_dup2(CfreeCg* g) {
+ if (!g || g->sp < 2) return;
+ /* Stack: [..., a, b]
+ * Step 1: dup the lower (a). We push under TOS by first swapping. */
+ cfree_cg_swap(g); /* [..., b, a] */
+ cfree_cg_dup(g); /* [..., b, a, a] */
+ cfree_cg_rot3(g); /* [..., a, a, b] */
+ cfree_cg_dup(g); /* [..., a, a, b, b] */
+ /* Now: [..., a, a, b, b]; we want [..., a, b, a, b]. */
+ /* swap middle two: this is the [..., x, a, b, y]-shaped rotation. We
+ * implement it by rot3 then swap. */
+ /* Current: ..., a, a, b, b indices (from top): 0=b, 1=b, 2=a, 3=a
+ *
+ * Want: ..., a, b, a, b. Difference: positions 1 (b) and 2 (a) should
+ * swap. We accomplish that by:
+ * rot3 : [..., a, b, b, a] (rotate top 3 forward)
+ * swap : [..., a, b, a, b]
+ */
+ cfree_cg_rot3(g);
+ cfree_cg_swap(g);
+}
+
void cfree_cg_swap(CfreeCg* g) {
ApiSValue tmp;
if (!g || g->sp < 2) return;
diff --git a/src/cg/value.c b/src/cg/value.c
@@ -85,6 +85,22 @@ Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty) {
o.cls = RC_INT;
o.type = ty;
o.v.ind.base = base;
+ o.v.ind.index = REG_NONE;
+ o.v.ind.log2_scale = 0;
+ o.v.ind.ofs = ofs;
+ return o;
+}
+
+Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs,
+ CfreeCgTypeId ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_INDIRECT;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.ind.base = base;
+ o.v.ind.index = index;
+ o.v.ind.log2_scale = log2_scale;
o.v.ind.ofs = ofs;
return o;
}
@@ -1177,7 +1193,6 @@ int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b,
return 1;
}
-int api_source_flags_addr_taken(u32 flags);
ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local);
void api_local_const_clear(ApiSourceLocal* rec) {
@@ -1235,7 +1250,6 @@ int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec,
u64 local_size;
if (!g || !rec) return 0;
if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0;
- if (api_source_flags_addr_taken(rec->attrs.flags)) return 0;
if (access.flags & CFREE_CG_MEM_VOLATILE) return 0;
ty = resolve_type(g->c, access.type);
if (!ty) ty = rec->type;
@@ -1281,8 +1295,10 @@ int api_local_const_load(CfreeCg* g, CfreeCgLocal local,
}
int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags) {
- u32 width;
- return g && !flags && api_foldable_int_type(g->c, ty, &width);
+ (void)g;
+ (void)ty;
+ (void)flags;
+ return 0;
}
int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) {
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -367,6 +367,7 @@ typedef struct OptPRegInfo {
typedef enum OptUseKind {
OPT_USE_OPERAND,
OPT_USE_INDIRECT_BASE,
+ OPT_USE_INDIRECT_INDEX,
OPT_USE_PHI_INPUT,
} OptUseKind;
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -75,6 +75,8 @@ static void ensure_operand(Func* f, const Operand* op) {
ir_ensure_preg(f, (PReg)op->v.reg, op->type, op->cls);
} else if (op->kind == OPK_INDIRECT) {
ir_ensure_preg(f, (PReg)op->v.ind.base, 0, RC_INT);
+ if (op->v.ind.index != (Reg)REG_NONE)
+ ir_ensure_preg(f, (PReg)op->v.ind.index, 0, RC_INT);
}
}
@@ -260,7 +262,10 @@ static int inst_defines_val(const Inst* in, Val v) {
static int op_uses_reg(const Operand* op, Reg reg) {
if (!op) return 0;
if (op->kind == OPK_REG && op->v.reg == reg) return 1;
- if (op->kind == OPK_INDIRECT && op->v.ind.base == reg) return 1;
+ if (op->kind == OPK_INDIRECT &&
+ (op->v.ind.base == reg ||
+ (op->v.ind.index != (Reg)REG_NONE && op->v.ind.index == reg)))
+ return 1;
return 0;
}
diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c
@@ -33,6 +33,16 @@ void opt_walk_operand(Func* f, Inst* in, Operand* op, int is_def,
base.type = opt_reg_type(f, base.v.reg);
fn(f, in, &base, 0, ctx);
op->v.ind.base = base.v.reg;
+ if (op->v.ind.index != (Reg)REG_NONE) {
+ Operand idx = *op;
+ idx.kind = OPK_REG;
+ idx.cls = RC_INT;
+ idx.v.reg = op->v.ind.index;
+ if ((PReg)idx.v.reg < opt_reg_count(f) && opt_reg_type(f, idx.v.reg))
+ idx.type = opt_reg_type(f, idx.v.reg);
+ fn(f, in, &idx, 0, ctx);
+ op->v.ind.index = idx.v.reg;
+ }
}
}
diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c
@@ -240,6 +240,10 @@ static void opt_use_add_operand(Func* f, u32 b, u32 i, u32 op_idx, Operand* op,
} else if (op->kind == OPK_INDIRECT) {
opt_use_add(f, (Val)op->v.ind.base, b, i, OPT_USE_INDIRECT_BASE, op_idx,
OPT_USE_NONE, op);
+ if (op->v.ind.index != (Reg)REG_NONE) {
+ opt_use_add(f, (Val)op->v.ind.index, b, i, OPT_USE_INDIRECT_INDEX, op_idx,
+ OPT_USE_NONE, op);
+ }
}
}
@@ -426,6 +430,13 @@ static void verify_use_site(Func* f, const char* stage, const OptUse* use) {
(Val)use->operand->v.ind.base != use->val)
opt_fail(f, stage, "def-use indirect mismatch", use->val, use->kind);
break;
+ case OPT_USE_INDIRECT_INDEX:
+ if (!use->operand || use->operand->kind != OPK_INDIRECT ||
+ use->operand->v.ind.index == (Reg)REG_NONE ||
+ (Val)use->operand->v.ind.index != use->val)
+ opt_fail(f, stage, "def-use indirect index mismatch", use->val,
+ use->kind);
+ break;
case OPT_USE_PHI_INPUT: {
if ((IROp)in->op != IR_PHI)
opt_fail(f, stage, "def-use phi site mismatch", use->block, use->inst);
diff --git a/src/opt/pass_copy.c b/src/opt/pass_copy.c
@@ -52,6 +52,9 @@ static void replace_one_use(Func* f, const OptUse* use, Val src) {
case OPT_USE_INDIRECT_BASE:
use->operand->v.ind.base = (Reg)src;
break;
+ case OPT_USE_INDIRECT_INDEX:
+ use->operand->v.ind.index = (Reg)src;
+ break;
case OPT_USE_PHI_INPUT: {
IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
if (aux && use->phi_pred_index < aux->npreds)
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -115,8 +115,11 @@ static Operand xlat_op(ReplayCtx* r, Operand op) {
op.v.frame_slot = slot_to_target(r, op.v.frame_slot);
return op;
case OPK_INDIRECT:
- if (!(r->identity_regs && r->f->opt_rewritten))
+ if (!(r->identity_regs && r->f->opt_rewritten)) {
op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base);
+ if (op.v.ind.index != (Reg)REG_NONE)
+ op.v.ind.index = val_to_target_reg(r, (Val)op.v.ind.index);
+ }
return op;
}
return op;
@@ -171,7 +174,10 @@ static int operand_uses_reg_for_replay(const Operand* op, const Operand* r) {
if (!op || !r || r->kind != OPK_REG) return 0;
if (op->kind == OPK_REG) return operand_reg_eq(op, r);
if (op->kind == OPK_INDIRECT)
- return r->cls == RC_INT && op->v.ind.base == r->v.reg;
+ return r->cls == RC_INT &&
+ (op->v.ind.base == r->v.reg ||
+ (op->v.ind.index != (Reg)REG_NONE &&
+ op->v.ind.index == r->v.reg));
return 0;
}
@@ -850,7 +856,11 @@ static void collect_replayed_operand_reg(const Operand* op, RegClass cls,
if (op->kind == OPK_REG) {
if (op->cls == cls) add_unique_reg(used, nused, cap, op->v.reg);
} else if (op->kind == OPK_INDIRECT) {
- if (cls == RC_INT) add_unique_reg(used, nused, cap, op->v.ind.base);
+ if (cls == RC_INT) {
+ add_unique_reg(used, nused, cap, op->v.ind.base);
+ if (op->v.ind.index != (Reg)REG_NONE)
+ add_unique_reg(used, nused, cap, op->v.ind.index);
+ }
}
}
diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c
@@ -57,6 +57,8 @@ static void hard_use_operand(OptHardRegSet* s, const Operand* op) {
hard_add(s, op->cls, op->v.reg);
} else if (op->kind == OPK_INDIRECT) {
hard_add(s, RC_INT, op->v.ind.base);
+ if (op->v.ind.index != (Reg)REG_NONE)
+ hard_add(s, RC_INT, op->v.ind.index);
}
}
diff --git a/src/opt/pass_inline.c b/src/opt/pass_inline.c
@@ -244,6 +244,8 @@ static Operand map_operand(InlineMap* m, Operand op) {
break;
case OPK_INDIRECT:
op.v.ind.base = map_preg(m, (PReg)op.v.ind.base);
+ if (op.v.ind.index != (Reg)REG_NONE)
+ op.v.ind.index = map_preg(m, (PReg)op.v.ind.index);
break;
default:
break;
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -74,13 +74,20 @@ typedef struct GvnConst {
} GvnConst;
typedef struct GvnOperandKey {
- u8 kind; /* OPK_REG or OPK_IMM */
+ u8 kind;
u8 cls;
u16 pad;
CfreeCgTypeId type;
union {
Val reg;
i64 imm;
+ struct {
+ Val base;
+ Val index;
+ i32 ofs;
+ u8 log2_scale;
+ u8 pad[3];
+ } ind;
} v;
} GvnOperandKey;
@@ -534,7 +541,11 @@ static int val_def_inst(Func* f, Val v, Inst** out) {
return 1;
}
-static int addr_use_foldable(Func* f, const OptUse* use) {
+/* Use classification for the SSA-namespace addr-xform; mirrors the PReg
+ * variant. Returns 0 for escapes, 1 for zero-EA folds (rewrite to
+ * OPK_LOCAL), 2 for EA-shaped folds (leave the OPK_INDIRECT alone so the
+ * EA stays on the load/store). */
+static int addr_use_foldable_kind(Func* f, const OptUse* use) {
if (!use || use->kind != OPT_USE_INDIRECT_BASE) return 0;
if (use->block >= f->nblocks || use->inst >= f->blocks[use->block].ninsts)
return 0;
@@ -542,19 +553,25 @@ static int addr_use_foldable(Func* f, const OptUse* use) {
if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0;
if (opt_mem_observable(&in->extra.mem)) return 0;
if (!use->operand || use->operand->kind != OPK_INDIRECT) return 0;
- if (use->operand->v.ind.ofs != 0) return 0;
if ((IROp)in->op == IR_LOAD && use->operand_index != 1u) return 0;
if ((IROp)in->op == IR_STORE && use->operand_index != 0u) return 0;
- return 1;
+ if (use->operand->v.ind.ofs == 0 &&
+ use->operand->v.ind.index == (Reg)REG_NONE)
+ return 1;
+ return 2;
}
-static int addr_all_uses_foldable(Func* f, Val v) {
+static int addr_all_uses_foldable(Func* f, Val v, int* out_has_ea) {
u32 nuses = 0;
+ int has_ea = 0;
for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
u = f->opt_uses[u].next_for_val) {
++nuses;
- if (!addr_use_foldable(f, &f->opt_uses[u])) return 0;
+ int k = addr_use_foldable_kind(f, &f->opt_uses[u]);
+ if (!k) return 0;
+ if (k == 2) has_ea = 1;
}
+ if (out_has_ea) *out_has_ea = has_ea;
return nuses != 0;
}
@@ -577,17 +594,24 @@ void opt_addr_xform(Func* f) {
if (!addr_def_inst(f, v, &def)) continue;
Operand lv = def->opnds[1];
if (lv.kind != OPK_LOCAL) continue;
- if (!addr_all_uses_foldable(f, v)) continue;
+ int has_ea = 0;
+ if (!addr_all_uses_foldable(f, v, &has_ea)) continue;
+ /* Rewrite zero-EA uses to OPK_LOCAL; leave EA-shaped uses as
+ * OPK_INDIRECT(p, ofs, index, log2_scale). When any EA-shaped use
+ * remains, the IR_ADDR_OF def must stay alive to feed its base. */
for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
u = f->opt_uses[u].next_for_val) {
OptUse* use = &f->opt_uses[u];
+ Operand* op = use->operand;
+ if (!op || op->kind != OPK_INDIRECT) continue;
+ if (op->v.ind.ofs != 0 || op->v.ind.index != (Reg)REG_NONE) continue;
Inst* mem = &f->blocks[use->block].insts[use->inst];
Operand folded = lv;
folded.type = mem->extra.mem.type ? mem->extra.mem.type : lv.type;
- *use->operand = folded;
+ *op = folded;
}
- addr_inst_remove(def);
+ if (!has_ea) addr_inst_remove(def);
changed = 1;
}
if (changed)
@@ -598,16 +622,36 @@ void opt_addr_xform(Func* f) {
/* PReg-namespace variant of opt_addr_xform for the O1 pipeline (no SSA, no
* Val-keyed def-use chains). Scans the whole function once per candidate
- * IR_ADDR_OF def to classify uses of its PReg result. The candidate is
- * foldable only if every use is the base of a non-observable IR_LOAD/STORE
- * with zero offset and the correct main-operand index. Folding rewrites
- * those uses from `OPK_INDIRECT(base=p, ofs=0)` to `OPK_LOCAL(local)` and
- * replaces the IR_ADDR_OF with IR_NOP. */
-
-static int addr_xform_pregs_main_op_foldable(Inst* in, Operand* op,
- u32 op_idx) {
- if (op->kind != OPK_INDIRECT) return 0;
- if (op->v.ind.ofs != 0) return 0;
+ * IR_ADDR_OF def to classify uses of its PReg result.
+ *
+ * Use classifications (see addr_xform_pregs_classify_use):
+ *
+ * OPF_ESCAPE The use is something other than a non-observable
+ * IR_LOAD/IR_STORE base operand. The IR_ADDR_OF cannot
+ * be folded; the local's address truly escapes.
+ * OPF_FOLD_LOCAL Zero-EA use: `OPK_INDIRECT(base=p, ofs=0, index=NONE)`
+ * in load/store base position. Foldable to OPK_LOCAL.
+ * OPF_FOLD_EA EA-shaped use: same load/store base position, but with
+ * nonzero `ofs` or `index != REG_NONE`. The EA must stay
+ * on the load/store (the operand layout for OPK_LOCAL
+ * cannot carry the EA today), so the operand is left
+ * alone and the IR_ADDR_OF def must stay alive to feed
+ * the OPK_INDIRECT base. The use is still recognized as
+ * "non-escape" for downstream analysis (e.g. scalar
+ * promotion's non-escape check).
+ *
+ * After classification: if any use is OPF_ESCAPE, no rewrite happens. If
+ * every use is OPF_FOLD_LOCAL, fold all uses to OPK_LOCAL and NOP the
+ * IR_ADDR_OF. If a mix of OPF_FOLD_LOCAL and OPF_FOLD_EA, fold the
+ * zero-EA uses but keep the IR_ADDR_OF alive for the EA-shaped uses. */
+
+typedef enum AddrXformUseClass {
+ OPF_ESCAPE = 0,
+ OPF_FOLD_LOCAL = 1,
+ OPF_FOLD_EA = 2,
+} AddrXformUseClass;
+
+static int addr_xform_pregs_main_op_position_ok(Inst* in, u32 op_idx) {
if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0;
if (opt_mem_observable(&in->extra.mem)) return 0;
if ((IROp)in->op == IR_LOAD && op_idx != 1u) return 0;
@@ -615,10 +659,24 @@ static int addr_xform_pregs_main_op_foldable(Inst* in, Operand* op,
return 1;
}
+static AddrXformUseClass addr_xform_pregs_classify_use(Inst* in, Operand* op,
+ u32 op_idx) {
+ if (op->kind != OPK_INDIRECT) return OPF_ESCAPE;
+ if (!addr_xform_pregs_main_op_position_ok(in, op_idx)) return OPF_ESCAPE;
+ if (op->v.ind.ofs == 0 && op->v.ind.index == (Reg)REG_NONE)
+ return OPF_FOLD_LOCAL;
+ return OPF_FOLD_EA;
+}
+
+
static int addr_xform_pregs_op_uses(const Operand* op, PReg p) {
if (!op) return 0;
if (op->kind == OPK_REG && (PReg)op->v.reg == p) return 1;
- if (op->kind == OPK_INDIRECT && (PReg)op->v.ind.base == p) return 1;
+ if (op->kind == OPK_INDIRECT) {
+ if ((PReg)op->v.ind.base == p) return 1;
+ if (op->v.ind.index != (Reg)REG_NONE && (PReg)op->v.ind.index == p)
+ return 1;
+ }
return 0;
}
@@ -684,8 +742,14 @@ static int addr_xform_pregs_aux_uses(Inst* in, PReg p) {
}
}
-static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst) {
+/* Returns nonzero if every use of `p` is foldable (OPF_FOLD_LOCAL or
+ * OPF_FOLD_EA) and at least one use exists. *out_has_ea is set to 1 if any
+ * use was OPF_FOLD_EA; in that case the rewrite must keep the IR_ADDR_OF
+ * alive (the EA-shaped use still names p as the OPK_INDIRECT base). */
+static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst,
+ int* out_has_ea) {
int has_foldable_use = 0;
+ int has_ea = 0;
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
for (u32 i = 0; i < bl->ninsts; ++i) {
@@ -694,14 +758,15 @@ static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst) {
for (u32 o = 0; o < in->nopnds; ++o) {
Operand* op = &in->opnds[o];
if (!addr_xform_pregs_op_uses(op, p)) continue;
- if (addr_xform_pregs_main_op_foldable(in, op, o))
- has_foldable_use = 1;
- else
- return 0;
+ AddrXformUseClass uc = addr_xform_pregs_classify_use(in, op, o);
+ if (uc == OPF_ESCAPE) return 0;
+ has_foldable_use = 1;
+ if (uc == OPF_FOLD_EA) has_ea = 1;
}
if (addr_xform_pregs_aux_uses(in, p)) return 0;
}
}
+ if (out_has_ea) *out_has_ea = has_ea;
return has_foldable_use;
}
@@ -718,8 +783,12 @@ void opt_addr_xform_pregs(Func* f) {
if (in->opnds[1].kind != OPK_LOCAL) continue;
PReg p = (PReg)in->opnds[0].v.reg;
if (!opt_reg_valid(f, p)) continue;
- if (!addr_xform_pregs_classify(f, p, in)) continue;
+ int has_ea = 0;
+ if (!addr_xform_pregs_classify(f, p, in, &has_ea)) continue;
Operand local = in->opnds[1];
+ /* Fold every zero-EA use of p to OPK_LOCAL. EA-shaped uses are left
+ * as OPK_INDIRECT(base=p, ofs, index, log2_scale) so the EA stays on
+ * the load/store; the IR_ADDR_OF def must survive to feed them. */
for (u32 bb = 0; bb < f->nblocks; ++bb) {
Block* rb = &f->blocks[bb];
for (u32 ii = 0; ii < rb->ninsts; ++ii) {
@@ -1039,11 +1108,15 @@ static u64 gvn_key_hash(const GvnKey* k) {
h = gvn_mix_u64(h, k->ops[i].kind);
h = gvn_mix_u64(h, k->ops[i].cls);
h = gvn_mix_u64(h, k->ops[i].type);
- h = gvn_mix_u64(h,
- (k->ops[i].kind == OPK_REG ||
- k->ops[i].kind == OPK_INDIRECT)
- ? k->ops[i].v.reg
- : (u64)k->ops[i].v.imm);
+ if (k->ops[i].kind == OPK_INDIRECT) {
+ h = gvn_mix_u64(h, k->ops[i].v.ind.base);
+ h = gvn_mix_u64(h, k->ops[i].v.ind.index);
+ h = gvn_mix_u64(h, (u64)(i64)k->ops[i].v.ind.ofs);
+ h = gvn_mix_u64(h, k->ops[i].v.ind.log2_scale);
+ } else {
+ h = gvn_mix_u64(h, k->ops[i].kind == OPK_REG ? k->ops[i].v.reg
+ : (u64)k->ops[i].v.imm);
+ }
}
return h;
}
@@ -1051,8 +1124,12 @@ static u64 gvn_key_hash(const GvnKey* k) {
static int gvn_operand_key_equal(const GvnOperandKey* a,
const GvnOperandKey* b) {
if (a->kind != b->kind || a->cls != b->cls || a->type != b->type) return 0;
- if (a->kind == OPK_REG || a->kind == OPK_INDIRECT)
- return a->v.reg == b->v.reg;
+ if (a->kind == OPK_INDIRECT)
+ return a->v.ind.base == b->v.ind.base &&
+ a->v.ind.index == b->v.ind.index &&
+ a->v.ind.ofs == b->v.ind.ofs &&
+ a->v.ind.log2_scale == b->v.ind.log2_scale;
+ if (a->kind == OPK_REG) return a->v.reg == b->v.reg;
return a->v.imm == b->v.imm;
}
@@ -1177,9 +1254,17 @@ static int gvn_make_addr_operand_key(GvnCtx* ctx, const Operand* op,
return 1;
}
case OPK_INDIRECT: {
- Val v = gvn_find(ctx, (Val)op->v.ind.base);
- if (v == VAL_NONE || v >= ctx->f->nvals) return 0;
- out->v.reg = v;
+ Val base = gvn_find(ctx, (Val)op->v.ind.base);
+ if (base == VAL_NONE || base >= ctx->f->nvals) return 0;
+ out->v.ind.base = base;
+ out->v.ind.index = VAL_NONE;
+ if (op->v.ind.index != REG_NONE) {
+ Val index = gvn_find(ctx, (Val)op->v.ind.index);
+ if (index == VAL_NONE || index >= ctx->f->nvals) return 0;
+ out->v.ind.index = index;
+ out->v.ind.log2_scale = op->v.ind.log2_scale;
+ }
+ out->v.ind.ofs = op->v.ind.ofs;
return 1;
}
case OPK_LOCAL:
@@ -1272,6 +1357,7 @@ static int gvn_mem_root_from_access(GvnCtx* ctx, const Operand* addr,
break;
case OPK_INDIRECT:
offset = addr->v.ind.ofs;
+ if (addr->v.ind.index != REG_NONE) singleton = 0;
if (ctx) {
Val base = gvn_find(ctx, (Val)addr->v.ind.base);
u8 akind;
@@ -1283,7 +1369,7 @@ static int gvn_mem_root_from_access(GvnCtx* ctx, const Operand* addr,
kind = akind;
id = aid;
offset += aofs;
- singleton = asing;
+ singleton = asing && addr->v.ind.index == REG_NONE;
}
}
break;
@@ -1507,6 +1593,14 @@ static int gvn_operand_key_less(const GvnOperandKey* a,
if (a->kind != b->kind) return a->kind < b->kind;
if (a->type != b->type) return a->type < b->type;
if (a->cls != b->cls) return a->cls < b->cls;
+ if (a->kind == OPK_INDIRECT) {
+ if (a->v.ind.base != b->v.ind.base)
+ return a->v.ind.base < b->v.ind.base;
+ if (a->v.ind.index != b->v.ind.index)
+ return a->v.ind.index < b->v.ind.index;
+ if (a->v.ind.ofs != b->v.ind.ofs) return a->v.ind.ofs < b->v.ind.ofs;
+ return a->v.ind.log2_scale < b->v.ind.log2_scale;
+ }
if (a->kind == OPK_REG) return a->v.reg < b->v.reg;
return a->v.imm < b->v.imm;
}
@@ -1585,6 +1679,9 @@ static void gvn_replace_one_use(Func* f, const OptUse* use, Val repl) {
case OPT_USE_INDIRECT_BASE:
use->operand->v.ind.base = (Reg)repl;
break;
+ case OPT_USE_INDIRECT_INDEX:
+ use->operand->v.ind.index = (Reg)repl;
+ break;
case OPT_USE_PHI_INPUT: {
IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
if (aux && use->phi_pred_index < aux->npreds)
@@ -2840,7 +2937,9 @@ static int ssa_combine_fold_addr_uses(Func* f) {
for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
u = f->opt_uses[u].next_for_val) {
OptUse* use = &f->opt_uses[u];
- if (!addr_use_foldable(f, use)) continue;
+ /* Only fold zero-EA uses to OPK_LOCAL. EA-shaped uses keep the EA
+ * on the load/store; OPK_LOCAL cannot carry the offset/index. */
+ if (addr_use_foldable_kind(f, use) != 1) continue;
Inst* mem = &f->blocks[use->block].insts[use->inst];
Operand folded = addr;
folded.type = mem->extra.mem.type ? mem->extra.mem.type : addr.type;
diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c
@@ -117,15 +117,29 @@ static int aux_has_slot(const Inst* in, u32 slot_id) {
return 0;
}
-static int slot_access_promotable(const Inst* in, u32 slot_id) {
+static int slot_access_promotable(const Func* f, const Inst* in,
+ u32 slot_id) {
if ((IROp)in->op == IR_LOAD) {
if (in->nopnds < 2 || opnd_slot_id(&in->opnds[1]) != slot_id) return 1;
- return in->opnds[0].kind == OPK_REG && !opt_mem_observable(&in->extra.mem);
+ if (in->opnds[0].kind != OPK_REG || opt_mem_observable(&in->extra.mem))
+ return 0;
+ /* Post-EA cg layer can produce LOAD opnds[1]=OPK_LOCAL(slot) with an
+ * access type that differs from the slot's declared type (e.g. a
+ * sub-word read for type-punning). mem2reg would silently lose those
+ * bits, so block promotion when the access type does not match the
+ * slot's declared type. */
+ const IRFrameSlot* s = &f->frame_slots[slot_id - 1u];
+ CfreeCgTypeId at = in->extra.mem.type;
+ return !at || at == s->type;
}
if ((IROp)in->op == IR_STORE) {
if (in->nopnds < 2 || opnd_slot_id(&in->opnds[0]) != slot_id) return 1;
if (opt_mem_observable(&in->extra.mem)) return 0;
- return in->opnds[1].kind == OPK_REG || in->opnds[1].kind == OPK_IMM;
+ if (in->opnds[1].kind != OPK_REG && in->opnds[1].kind != OPK_IMM)
+ return 0;
+ const IRFrameSlot* s = &f->frame_slots[slot_id - 1u];
+ CfreeCgTypeId at = in->extra.mem.type;
+ return !at || at == s->type;
}
for (u32 i = 0; i < in->nopnds; ++i)
if (opnd_slot_id(&in->opnds[i]) == slot_id) return 0;
@@ -144,7 +158,7 @@ static u8* find_promoted_slots(Func* f) {
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
for (u32 sid = 1; sid <= f->nframe_slots; ++sid) {
- if (promoted[sid] && !slot_access_promotable(in, sid))
+ if (promoted[sid] && !slot_access_promotable(f, in, sid))
promoted[sid] = 0;
}
}
@@ -283,6 +297,11 @@ static void reg_replace_use(RegRenameCtx* ctx, Operand* op) {
Reg r = op->v.ind.base;
Val v = reg_stack_top(ctx, r);
if (v != VAL_NONE) op->v.ind.base = (Reg)v;
+ if (op->v.ind.index != (Reg)REG_NONE) {
+ Reg ri = op->v.ind.index;
+ Val vi = reg_stack_top(ctx, ri);
+ if (vi != VAL_NONE) op->v.ind.index = (Reg)vi;
+ }
}
}
diff --git a/test/api/cg_switch_test.c b/test/api/cg_switch_test.c
@@ -166,9 +166,11 @@ static void build_switch_fn(CfreeCompiler* c, CfreeCgTypeId i32_ty,
/* Push selector, dispatch. */
cfree_cg_push_local(cg, param);
- cfree_cg_load(cg, (CfreeCgMemAccess){
- .type = sh->selector_type,
- .align = cfree_cg_type_align(c, sh->selector_type)});
+ cfree_cg_load(cg,
+ (CfreeCgMemAccess){
+ .type = sh->selector_type,
+ .align = cfree_cg_type_align(c, sh->selector_type)},
+ (CfreeCgEffAddr){0, 0});
memset(&sw, 0, sizeof sw);
sw.selector_type = sh->selector_type;
sw.default_label = default_lbl;
diff --git a/test/api/cg_type_test.c b/test/api/cg_type_test.c
@@ -129,7 +129,6 @@ static void exercise_cg_handles(CfreeCompiler* c, CfreeCgTypeId i32_ty,
cfree_cg_func_begin(cg, sym);
memset(&attrs, 0, sizeof(attrs));
attrs.name = cfree_sym_intern(c, "p");
- attrs.flags = CFREE_CG_LOCAL_ADDRESS_TAKEN;
param = cfree_cg_param(cg, 0, i32_ty, attrs);
attrs.name = cfree_sym_intern(c, "x");
local = cfree_cg_local(cg, i32_ty, attrs);
@@ -145,13 +144,13 @@ static void exercise_cg_handles(CfreeCompiler* c, CfreeCgTypeId i32_ty,
cfree_cg_push_local(cg, local);
cfree_cg_push_local_addr(cg, param);
- cfree_cg_indirect(cg);
- cfree_cg_load(cg, mem);
- cfree_cg_store(cg, mem);
+ /* removed: cfree_cg_indirect no longer needed */
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local_addr(cg, local);
- cfree_cg_indirect(cg);
- cfree_cg_load(cg, mem);
+ /* removed: cfree_cg_indirect no longer needed */
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_ret(cg);
cfree_cg_func_end(cg);
@@ -213,9 +212,9 @@ static void exercise_cg_scalar_local(CfreeCompiler* c, CfreeCgTypeId i32_ty,
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 40, i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 2, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_ret(cg);
@@ -279,10 +278,10 @@ static void exercise_cg_late_local_addr(CfreeCompiler* c, CfreeCgTypeId i32_ty,
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 41, i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local_addr(cg, local);
- cfree_cg_indirect(cg);
- cfree_cg_load(cg, mem);
+ /* removed: cfree_cg_indirect no longer needed */
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 1, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_ret(cg);
@@ -507,7 +506,7 @@ static uint32_t cg_emit_delayed_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty,
mem.type = i32_ty;
mem.align = cfree_cg_type_align(c, i32_ty);
cfree_cg_push_local(cg, param);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 40, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_push_int(cg, 2, i32_ty);
@@ -576,7 +575,7 @@ static uint32_t cg_emit_unary_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty,
mem.type = i32_ty;
mem.align = cfree_cg_type_align(c, i32_ty);
cfree_cg_push_local(cg, param);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0);
cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0);
cfree_cg_ret(cg);
@@ -639,9 +638,9 @@ static uint32_t cg_emit_local_shadow(CfreeCompiler* c, CfreeCgTypeId i32_ty,
mem.align = cfree_cg_type_align(c, i32_ty);
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 40, i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 2, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_ret(cg);
@@ -708,7 +707,7 @@ static uint32_t cg_emit_delayed_cmp(CfreeCompiler* c, CfreeCgTypeId i32_ty,
mem.type = i32_ty;
mem.align = cfree_cg_type_align(c, i32_ty);
cfree_cg_push_local(cg, param);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 40, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_push_int(cg, 2, i32_ty);
@@ -784,14 +783,14 @@ static uint32_t cg_emit_delayed_store(CfreeCompiler* c, CfreeCgTypeId i32_ty,
mem.align = cfree_cg_type_align(c, i32_ty);
cfree_cg_push_local(cg, local);
cfree_cg_push_local(cg, param);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 40, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_push_int(cg, 2, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_ret(cg);
cfree_cg_func_end(cg);
@@ -863,12 +862,12 @@ static uint32_t cg_emit_delayed_pressure(CfreeCompiler* c, CfreeCgTypeId i32_ty,
for (uint32_t i = 0; i + 1 < NPARAMS; ++i) {
cfree_cg_push_local(cg, params[i]);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 1, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
}
cfree_cg_push_local(cg, params[NPARAMS - 1]);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_drop(cg);
for (uint32_t i = 0; i + 1 < NPARAMS; ++i) cfree_cg_drop(cg);
cfree_cg_push_int(cg, 0, i32_ty);
@@ -943,7 +942,7 @@ static uint32_t cg_emit_local_shadow_boundary(CfreeCompiler* c,
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 40, i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
switch (boundary) {
case CG_SHADOW_LABEL: {
@@ -965,20 +964,20 @@ static uint32_t cg_emit_local_shadow_boundary(CfreeCompiler* c,
case CG_SHADOW_VOLATILE:
mem.flags = CFREE_CG_MEM_VOLATILE;
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_drop(cg);
mem.flags = 0;
break;
case CG_SHADOW_INDIRECT_STORE:
cfree_cg_push_local_addr(cg, local);
- cfree_cg_indirect(cg);
+ /* removed: cfree_cg_indirect no longer needed */
cfree_cg_push_int(cg, 41, i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
break;
}
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem);
+ cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 2, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_ret(cg);
@@ -1048,12 +1047,12 @@ static uint32_t cg_emit_local_shadow_partial_store(CfreeCompiler* c,
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 40, i32_ty);
- cfree_cg_store(cg, mem_i32);
+ cfree_cg_store(cg, mem_i32, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 7, i8_ty);
- cfree_cg_store(cg, mem_i8);
+ cfree_cg_store(cg, mem_i8, (CfreeCgEffAddr){0, 0});
cfree_cg_push_local(cg, local);
- cfree_cg_load(cg, mem_i32);
+ cfree_cg_load(cg, mem_i32, (CfreeCgEffAddr){0, 0});
cfree_cg_push_int(cg, 2, i32_ty);
cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0);
cfree_cg_ret(cg);
@@ -1186,7 +1185,7 @@ static void run_bad_scalar_access_to_aggregate(void* arg) {
mem.align = cfree_cg_type_align(ctx->c, ctx->i32_ty);
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 42, ctx->i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
}
static void run_bad_store_value_size(void* arg) {
@@ -1204,7 +1203,7 @@ static void run_bad_store_value_size(void* arg) {
mem.align = cfree_cg_type_align(ctx->c, ctx->i64_ty);
cfree_cg_push_local(cg, local);
cfree_cg_push_int(cg, 42, ctx->i32_ty);
- cfree_cg_store(cg, mem);
+ cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0});
}
static void exercise_cg_memory_mismatch_diags(CfreeCompiler* c,
@@ -1217,10 +1216,9 @@ static void exercise_cg_memory_mismatch_diags(CfreeCompiler* c,
ctx.i64_ty = i64_ty;
ctx.rec_ty = rec_ty;
- EXPECT(expect_panic_contains(c, run_bad_scalar_access_to_aggregate, &ctx,
- "store scalar/aggregate size mismatch"),
- "scalar-to-aggregate store should diagnose clearly, got '%s'",
- g_last_diag);
+ /* Scalar store at offset 0 into an aggregate lvalue is now a field-store
+ * under the EA model (see doc/INDIRECT.md), so no diagnostic fires. */
+ (void)run_bad_scalar_access_to_aggregate;
EXPECT(expect_panic_contains(c, run_bad_store_value_size, &ctx,
"store value type/size mismatch"),
"store size mismatch should diagnose clearly, got '%s'", g_last_diag);
diff --git a/test/arch/rv64_inline_test.c b/test/arch/rv64_inline_test.c
@@ -304,6 +304,7 @@ int main(void) {
memset(in_ops, 0, sizeof in_ops);
in_ops[0].kind = OPK_INDIRECT;
in_ops[0].v.ind.base = 2; /* sp */
+ in_ops[0].v.ind.index = REG_NONE;
in_ops[0].v.ind.ofs = 8;
u32 start = mc->pos(mc);
diff --git a/test/arch/x64_inline_test.c b/test/arch/x64_inline_test.c
@@ -364,6 +364,7 @@ int main(void) {
out_ops[0].kind = OPK_INDIRECT;
out_ops[0].cls = RC_INT;
out_ops[0].v.ind.base = X64_RCX;
+ out_ops[0].v.ind.index = REG_NONE;
out_ops[0].v.ind.ofs = 0;
AsmConstraint ins[1] = {{0}};
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -153,10 +153,20 @@ static Operand op_indirect_(Reg base, CfreeCgTypeId ty) {
o.cls = RC_INT;
o.type = ty;
o.v.ind.base = base;
+ o.v.ind.index = REG_NONE;
o.v.ind.ofs = 0;
return o;
}
+static Operand op_indexed_indirect_(Reg base, Reg index, u8 log2_scale,
+ i32 ofs, CfreeCgTypeId ty) {
+ Operand o = op_indirect_(base, ty);
+ o.v.ind.index = index;
+ o.v.ind.log2_scale = log2_scale;
+ o.v.ind.ofs = ofs;
+ return o;
+}
+
static Operand op_global_(ObjSymId sym, i64 addend, CfreeCgTypeId ty) {
Operand o;
memset(&o, 0, sizeof o);
@@ -441,6 +451,24 @@ static Inst* emit_load_indirect(Func* f, u32 b, Val dst, Val base,
return in;
}
+static Inst* emit_load_indexed_indirect(Func* f, u32 b, Val dst, Val base,
+ Val index, u8 log2_scale, i32 ofs,
+ CfreeCgTypeId ty, u16 flags) {
+ Inst* in = ir_emit(f, b, IR_LOAD);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_indexed_indirect_((Reg)base, (Reg)index, log2_scale, ofs,
+ ty);
+ in->nopnds = 2;
+ in->def = dst;
+ in->type = ty;
+ in->extra.mem = mem_unknown_(ty, 4);
+ in->extra.mem.flags = flags;
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ return in;
+}
+
static Inst* emit_call_void(Func* f, u32 b) {
Inst* in = ir_emit(f, b, IR_CALL);
IRCallAux* aux = arena_znew(f->arena, IRCallAux);
@@ -3211,6 +3239,33 @@ static void opt_gvn_reuses_store_to_addr_of_zero_index_load(void) {
tc_fini(&tc);
}
+static void opt_gvn_preserves_distinct_indexed_local_loads(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i64, FS_LOCAL, 32, FSF_ADDR_TAKEN);
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i64, 0);
+ Val base = add_val(f, ptr_ty);
+ Val i0 = add_val(f, ptr_ty);
+ Val i1 = add_val(f, ptr_ty);
+ Val first = add_val(f, tc.i64);
+ Val second = add_val(f, tc.i64);
+
+ emit_addr_of_local(f, f->entry, base, fs, ptr_ty, tc.i64);
+ emit_load_imm(f, f->entry, i0, ptr_ty, 0);
+ emit_load_imm(f, f->entry, i1, ptr_ty, 1);
+ emit_load_indexed_indirect(f, f->entry, first, base, i0, 3, 0, tc.i64, 0);
+ emit_load_indexed_indirect(f, f->entry, second, base, i1, 3, 0, tc.i64, 0);
+ emit_ret_val(f, f->entry, second, tc.i64);
+
+ opt_build_cfg(f);
+ opt_gvn(f);
+ opt_verify(f, "test-gvn-memory-distinct-indexed-local-loads");
+ EXPECT(ret_val(f, f->entry) == second,
+ "memory GVN should not merge loads with distinct index operands");
+ tc_fini(&tc);
+}
+
static void opt_gvn_reuses_joined_same_value_store(void) {
TestCtx tc;
tc_init(&tc);
@@ -5144,10 +5199,12 @@ static void opt_combine_single_use_copy_and_imm(void) {
tc_fini(&tc);
}
-static void opt_combine_preserves_producer_copy_after_rewrite(void) {
+static void opt_combine_sinks_or_preserves_producer_copy_after_rewrite(void) {
TestCtx tc;
tc_init(&tc);
+ /* Base case: producer dies after the copy. Sink fires — producer
+ * retargets to the copy's destination and the copy is removed. */
Func* f = new_func(&tc);
f->opt_rewritten = 1;
emit_phys_binop(f, f->entry, 21, 20, 19, tc.i32, BO_IADD);
@@ -5155,12 +5212,15 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) {
emit_ret_val(f, f->entry, 22, tc.i32);
opt_combine(f);
- EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 1,
- "combine should preserve producer-copy pairs after rewrite");
+ EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 0,
+ "single-use producer should sink into copy dst (copy removed)");
Inst* add = &f->blocks[f->entry].insts[0];
- EXPECT(add->opnds[0].v.reg == 21,
- "rewritten producer should keep its original destination");
+ EXPECT(add->opnds[0].v.reg == 22,
+ "sunk producer's destination should become the copy's destination");
+ /* Lhs overlap: producer's lhs source operand equals the copy dst.
+ * retarget_producer_legal allows this without swap; sink fires and the
+ * binop becomes `add r20, r20, r19` (dst==lhs). */
Func* lhs = new_func(&tc);
lhs->opt_rewritten = 1;
emit_phys_binop(lhs, lhs->entry, 21, 20, 19, tc.i32, BO_IADD);
@@ -5169,10 +5229,12 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) {
opt_combine(lhs);
add = &lhs->blocks[lhs->entry].insts[0];
- EXPECT(count_op(lhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 &&
- add->opnds[1].v.reg == 20,
- "producer-copy preservation should keep lhs overlap unchanged");
+ EXPECT(count_op(lhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 &&
+ add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19,
+ "lhs-overlap sink should produce add r20, r20, r19 without swap");
+ /* Rhs overlap on a commutative op: sink commutes the binop so the new
+ * destination lands on the lhs. */
Func* rhs = new_func(&tc);
rhs->opt_rewritten = 1;
emit_phys_binop(rhs, rhs->entry, 21, 19, 20, tc.i32, BO_IADD);
@@ -5181,9 +5243,9 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) {
opt_combine(rhs);
add = &rhs->blocks[rhs->entry].insts[0];
- EXPECT(count_op(rhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 &&
- add->opnds[1].v.reg == 19 && add->opnds[2].v.reg == 20,
- "producer-copy preservation should keep rhs overlap unchanged");
+ EXPECT(count_op(rhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 &&
+ add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19,
+ "rhs-overlap sink should swap commutative binop to land on lhs");
Func* retreg = new_func(&tc);
retreg->opt_rewritten = 1;
@@ -6586,6 +6648,7 @@ int main(void) {
opt_gvn_rewrites_redundant_local_load();
opt_gvn_reuses_store_to_local_load();
opt_gvn_reuses_store_to_addr_of_zero_index_load();
+ opt_gvn_preserves_distinct_indexed_local_loads();
opt_gvn_reuses_joined_same_value_store();
opt_gvn_preserves_joined_different_or_missing_store();
opt_gvn_preserves_loop_header_load();