kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit a126becea27945ea9b961a41958d6d4dbdf93dff
parent 3eb1bba007c88ed0e45f9234967602b1f2d56b8c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 23 May 2026 10:03:00 -0700

cg: extend memory ops with effective-address rider

Fold base + index*scale + offset directly onto loads and stores. Adds
CfreeCgEffAddr to the public API; cfree_cg_load/store now take an EA
alongside CfreeCgMemAccess (which also gains bit-field metadata). Drops
the old standalone cfree_cg_index / cfree_cg_field / cfree_cg_addr_offset
/ cfree_cg_indirect / cfree_cg_push_symbol_lvalue helpers — frontends
ride field offsets in ea.offset and array scales in ea.scale.

Operand.ind grows an index register and log2_scale. x64 emits SIB
natively; rv64/aa64/c_target fold via the new arch_lower_indexed helper.
opt threads OPT_USE_INDIRECT_INDEX through def-use, copy-prop, emit,
analysis, hard-live, inline, ssa, and the addr-xform/GVN paths in
pass_o2 (zero-EA uses still fold to OPK_LOCAL; EA-shaped uses keep the
IR_ADDR_OF alive). C, toy, and wasm frontends migrated; tests updated.

Diffstat:
Minclude/cfree/cg.h | 113+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mlang/c/parse/cg_adapter.c | 439+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mlang/c/parse/cg_public_compat.h | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlang/c/parse/parse_expr.c | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Mlang/c/parse/parse_init.c | 43++++++++++++++++++++++++++++++-------------
Mlang/c/parse/parse_priv.h | 1+
Mlang/toy/asm.c | 7++++---
Mlang/toy/builtins.c | 95+++++++++++++++++++++++++++++++++++++------------------------------------------
Mlang/toy/expr.c | 258+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mlang/toy/internal.h | 5+++++
Mlang/toy/parser.c | 150++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Mlang/toy/symbols.c | 42+++++++++++++++++++++++++++++++++++++++++-
Mlang/wasm/cg.c | 741+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Msrc/arch/aa64/asm.c | 6++++++
Msrc/arch/aa64/ops.c | 300+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/arch/arch.h | 9+++++++++
Msrc/arch/c_target/emit.c | 115++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Msrc/arch/cgtarget.c | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/asm.c | 13++++++++++++-
Msrc/arch/rv64/internal.h | 16+++++++++++++++-
Msrc/arch/rv64/isa.h | 9+++++++++
Msrc/arch/rv64/ops.c | 149++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Msrc/arch/x64/asm.c | 4++++
Msrc/arch/x64/emit.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/x64/internal.h | 10++++++++++
Msrc/arch/x64/isa.h | 32++++++++++++++++++++++++++++++++
Msrc/arch/x64/ops.c | 54++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/cg/arith.c | 19+++++++++++++++++--
Msrc/cg/call.c | 7+++++++
Msrc/cg/control.c | 179+++++--------------------------------------------------------------------------
Msrc/cg/data.c | 4++--
Msrc/cg/internal.h | 14+++++---------
Msrc/cg/local.c | 13+++++--------
Msrc/cg/memory.c | 909+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/cg/value.c | 24++++++++++++++++++++----
Msrc/opt/ir.h | 1+
Msrc/opt/opt.c | 7++++++-
Msrc/opt/opt_util.c | 10++++++++++
Msrc/opt/pass_analysis.c | 11+++++++++++
Msrc/opt/pass_copy.c | 3+++
Msrc/opt/pass_emit.c | 16+++++++++++++---
Msrc/opt/pass_hard_live.c | 2++
Msrc/opt/pass_inline.c | 2++
Msrc/opt/pass_o2.c | 175++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Msrc/opt/pass_ssa.c | 27+++++++++++++++++++++++----
Mtest/api/cg_switch_test.c | 8+++++---
Mtest/api/cg_type_test.c | 68+++++++++++++++++++++++++++++++++-----------------------------------
Mtest/arch/rv64_inline_test.c | 1+
Mtest/arch/x64_inline_test.c | 1+
Mtest/opt/opt_test.c | 85++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
50 files changed, 3211 insertions(+), 1305 deletions(-)

diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -249,8 +249,28 @@ typedef struct CfreeCgMemAccess { uint32_t flags; /* CfreeCgMemAccessFlag */ uint32_t alias_scope; uint32_t noalias_scope; + /* Bit-field metadata. When bit_width != 0 the memop performs a bit-field + * access at bit range [bit_offset, bit_offset + bit_width) within a + * storage_size-byte container located at the EA. */ + uint16_t bit_offset; + uint16_t bit_width; + uint32_t storage_size; + int bit_signed; } CfreeCgMemAccess; +/* Effective address rider on the memops: encodes + * base + index * scale + offset directly on the load/store. + * + * scale == 0 indicates "no index"; the memop consumes only [base]. + * scale > 0 indicates an indexed access; the memop consumes [base, index] + * and multiplies index by scale raw bytes (frontends pass element size + * explicitly — not log2). offset is a signed byte displacement folded into + * the addressing mode where the target permits. */ +typedef struct CfreeCgEffAddr { + int64_t offset; + uint32_t scale; +} CfreeCgEffAddr; + /* ============================================================ * Declarations and Symbols * ============================================================ */ @@ -400,7 +420,6 @@ void cfree_cg_func_end(CfreeCg*); typedef enum CfreeCgLocalFlag { CFREE_CG_LOCALFLAG_NONE = 0, - CFREE_CG_LOCAL_ADDRESS_TAKEN = 1u << 0, CFREE_CG_LOCAL_ARTIFICIAL = 1u << 1, CFREE_CG_LOCAL_OPTIMIZED_OUT = 1u << 2, CFREE_CG_LOCAL_COMPILER_TEMP = 1u << 3, @@ -517,6 +536,7 @@ void cfree_cg_unreachable(CfreeCg*); * ============================================================ */ void cfree_cg_dup(CfreeCg*); +void cfree_cg_dup2(CfreeCg*); /* duplicates the top two slots */ void cfree_cg_swap(CfreeCg*); void cfree_cg_drop(CfreeCg*); void cfree_cg_rot3(CfreeCg*); /* [..., a, b, c] -> [..., b, c, a] */ @@ -543,32 +563,27 @@ CfreeCgSym cfree_cg_const_data(CfreeCg*, const uint8_t* data, size_t len, * or equivalent lowering from the symbol attrs and output mode. */ void cfree_cg_push_symbol_addr(CfreeCg*, CfreeCgSym sym, int64_t addend); -/* Pushes an lvalue backed by sym + addend. For TLS objects this materializes - * the current thread's instance address as needed and then treats it as an - * indirect lvalue. */ -void cfree_cg_push_symbol_lvalue(CfreeCg*, CfreeCgSym sym, int64_t addend); - -/* Pops a pointer rvalue or lvalue address and pushes address + byte_offset as - * the requested result pointer/lvalue type. This is the generic primitive for - * frontend-owned aggregate layouts and non-standard record field offsets. */ -void cfree_cg_addr_offset(CfreeCg*, int64_t byte_offset, - CfreeCgTypeId result_type); - -/* Computes base + offset + index * element-size and pushes the element lvalue. - * Stack is [base, index]. The element size comes from the base pointer/array - * type and the access descriptor used by the eventual memory operation. */ -void cfree_cg_index(CfreeCg*, uint64_t offset); - -/* Pops a record lvalue and pushes the field lvalue. Offset is inferred from - * the record type and field_index. Use cfree_cg_addr after this when an - * address is required. */ -void cfree_cg_field(CfreeCg*, uint32_t field_index); - -/* Converts a pointer rvalue TOS from *T to an lvalue T. */ -void cfree_cg_indirect(CfreeCg*); -void cfree_cg_load(CfreeCg*, CfreeCgMemAccess access); +/* Projects an lvalue TOS back to a pointer rvalue (e.g. for `&x`, passing to + * a call, escape). */ void cfree_cg_addr(CfreeCg*); -void cfree_cg_store(CfreeCg*, CfreeCgMemAccess access); /* [lv, rv] -> [] */ + +/* Single load/store ops with an effective-address rider. + * + * `base` is either an lvalue (push_local) or a pointer-typed value. When + * `ea.scale == 0` the memop consumes only [base]; when `ea.scale > 0` it + * also pops an integer-typed `index` (with signedness inherited from its + * producer). Field offsets ride in `ea.offset`; array scales ride in + * `ea.scale` as raw bytes. + * + * Stack effects: + * scale == 0: + * load: [base] -> [value] + * store: [base, value] -> [] + * scale > 0: + * load: [base, index] -> [value] + * store: [base, index, value] -> [] */ +void cfree_cg_load(CfreeCg*, CfreeCgMemAccess access, CfreeCgEffAddr ea); +void cfree_cg_store(CfreeCg*, CfreeCgMemAccess access, CfreeCgEffAddr ea); /* ============================================================ * ABI variadic argument access @@ -1053,28 +1068,46 @@ static inline void cfree_cg_musttail_call_symbol(CfreeCg* cg, CfreeCgSym sym, cfree_cg_call_symbol(cg, sym, nargs, attrs); } +/* Read the scalar value of a local. Stack: [] -> [value]. */ +static inline void cfree_cg_local_read(CfreeCg* cg, CfreeCgLocal local, + CfreeCgMemAccess access) { + CfreeCgEffAddr ea = {0, 0}; + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, access, ea); +} + +/* Write the scalar value on TOS into a local. Stack: [value] -> []. */ +static inline void cfree_cg_local_write(CfreeCg* cg, CfreeCgLocal local, + CfreeCgMemAccess access) { + CfreeCgEffAddr ea = {0, 0}; + cfree_cg_push_local(cg, local); /* [value, lv] */ + cfree_cg_swap(cg); /* [lv, value] */ + cfree_cg_store(cg, access, ea); +} + /* Increment/decrement an lvalue in place. Stack: [lv] -> [result]. * post=1 pushes the old value; post=0 pushes the new value. * op is CFREE_CG_INT_ADD or CFREE_CG_INT_SUB. ty is the promoted integer type * of the lvalue. */ static inline void cfree_cg_inc_dec(CfreeCg* cg, CfreeCgIntBinOp op, int post, CfreeCgTypeId ty, CfreeCgMemAccess access) { - cfree_cg_dup(cg); /* [lv, lv] */ - cfree_cg_load(cg, access); /* [lv, old] */ + CfreeCgEffAddr ea = {0, 0}; + cfree_cg_dup(cg); /* [lv, lv] */ + cfree_cg_load(cg, access, ea); /* [lv, old] */ if (post) { - cfree_cg_dup(cg); /* [lv, old, old] */ - cfree_cg_push_int(cg, 1, ty); /* [lv, old, old, 1] */ - cfree_cg_int_binop(cg, op, 0); /* [lv, old, new] */ - cfree_cg_rot3(cg); /* [old, new, lv] */ - cfree_cg_swap(cg); /* [old, lv, new] */ - cfree_cg_store(cg, access); /* [old] */ + cfree_cg_dup(cg); /* [lv, old, old] */ + cfree_cg_push_int(cg, 1, ty); /* [lv, old, old, 1] */ + cfree_cg_int_binop(cg, op, 0); /* [lv, old, new] */ + cfree_cg_rot3(cg); /* [old, new, lv] */ + cfree_cg_swap(cg); /* [old, lv, new] */ + cfree_cg_store(cg, access, ea); /* [old] */ } else { - cfree_cg_push_int(cg, 1, ty); /* [lv, old, 1] */ - cfree_cg_int_binop(cg, op, 0); /* [lv, new] */ - cfree_cg_dup(cg); /* [lv, new, new] */ - cfree_cg_rot3(cg); /* [new, new, lv] */ - cfree_cg_swap(cg); /* [new, lv, new] */ - cfree_cg_store(cg, access); /* [new] */ + cfree_cg_push_int(cg, 1, ty); /* [lv, old, 1] */ + cfree_cg_int_binop(cg, op, 0); /* [lv, new] */ + cfree_cg_dup(cg); /* [lv, new, new] */ + cfree_cg_rot3(cg); /* [new, new, lv] */ + cfree_cg_swap(cg); /* [new, lv, new] */ + cfree_cg_store(cg, access, ea); /* [new] */ } } diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c @@ -38,9 +38,22 @@ CfreeCgMemAccess pcg_mem(Parser* p, const Type* ty) { return m; } +static void pcg_aux_clear(PcgLvAux* a) { + a->offset = 0; + a->scale = 0; + a->bit_offset = 0; + a->bit_width = 0; + a->storage_size = 0; + a->bit_signed = 0; + a->base_kind = PCG_LV_BASE_LOCAL; + a->pad[0] = a->pad[1] = a->pad[2] = a->pad[3] = 0; + a->pad[4] = a->pad[5] = 0; +} + static void pcg_stack_grow(Parser* p, u32 want) { const Type** ns; u8* nf; + PcgLvAux* na; u32 nc; if (p->cg_type_cap >= want) return; nc = p->cg_type_cap ? p->cg_type_cap * 2u : 64u; @@ -49,14 +62,20 @@ static void pcg_stack_grow(Parser* p, u32 want) { if (!ns) perr(p, "out of memory in CG type stack"); nf = arena_zarray(p->pool->arena, u8, nc); if (!nf) perr(p, "out of memory in CG value stack"); + na = arena_zarray(p->pool->arena, PcgLvAux, nc); + if (!na) perr(p, "out of memory in CG lvalue aux stack"); if (p->cg_type_stack && p->cg_type_sp) { memcpy(ns, p->cg_type_stack, sizeof(*ns) * p->cg_type_sp); } if (p->cg_value_flags && p->cg_type_sp) { memcpy(nf, p->cg_value_flags, sizeof(*nf) * p->cg_type_sp); } + if (p->cg_lv_aux && p->cg_type_sp) { + memcpy(na, p->cg_lv_aux, sizeof(*na) * p->cg_type_sp); + } p->cg_type_stack = ns; p->cg_value_flags = nf; + p->cg_lv_aux = na; p->cg_type_cap = nc; } @@ -64,6 +83,7 @@ void pcg_push_type(Parser* p, const Type* ty) { pcg_stack_grow(p, p->cg_type_sp + 1u); p->cg_type_stack[p->cg_type_sp] = ty; p->cg_value_flags[p->cg_type_sp] = 0; + pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp]); ++p->cg_type_sp; } @@ -74,19 +94,28 @@ void pcg_drop_type(Parser* p) { void pcg_dup_type(Parser* p) { const Type* ty = pcg_top_type(p); u8 flags = p->cg_type_sp ? p->cg_value_flags[p->cg_type_sp - 1u] : 0; + PcgLvAux aux; + if (p->cg_type_sp) aux = p->cg_lv_aux[p->cg_type_sp - 1u]; + else pcg_aux_clear(&aux); pcg_push_type(p, ty); - if (p->cg_type_sp) p->cg_value_flags[p->cg_type_sp - 1u] = flags; + if (p->cg_type_sp) { + p->cg_value_flags[p->cg_type_sp - 1u] = flags; + p->cg_lv_aux[p->cg_type_sp - 1u] = aux; + } } void pcg_swap_type(Parser* p) { if (p->cg_type_sp >= 2) { const Type* a = p->cg_type_stack[p->cg_type_sp - 1u]; u8 af = p->cg_value_flags[p->cg_type_sp - 1u]; + PcgLvAux ax = p->cg_lv_aux[p->cg_type_sp - 1u]; p->cg_type_stack[p->cg_type_sp - 1u] = p->cg_type_stack[p->cg_type_sp - 2u]; p->cg_value_flags[p->cg_type_sp - 1u] = p->cg_value_flags[p->cg_type_sp - 2u]; + p->cg_lv_aux[p->cg_type_sp - 1u] = p->cg_lv_aux[p->cg_type_sp - 2u]; p->cg_type_stack[p->cg_type_sp - 2u] = a; p->cg_value_flags[p->cg_type_sp - 2u] = af; + p->cg_lv_aux[p->cg_type_sp - 2u] = ax; } } @@ -94,17 +123,30 @@ void pcg_rot3_type(Parser* p) { if (p->cg_type_sp >= 3) { const Type* a = p->cg_type_stack[p->cg_type_sp - 3u]; u8 af = p->cg_value_flags[p->cg_type_sp - 3u]; + PcgLvAux ax = p->cg_lv_aux[p->cg_type_sp - 3u]; p->cg_type_stack[p->cg_type_sp - 3u] = p->cg_type_stack[p->cg_type_sp - 2u]; p->cg_value_flags[p->cg_type_sp - 3u] = p->cg_value_flags[p->cg_type_sp - 2u]; + p->cg_lv_aux[p->cg_type_sp - 3u] = p->cg_lv_aux[p->cg_type_sp - 2u]; p->cg_type_stack[p->cg_type_sp - 2u] = p->cg_type_stack[p->cg_type_sp - 1u]; p->cg_value_flags[p->cg_type_sp - 2u] = p->cg_value_flags[p->cg_type_sp - 1u]; + p->cg_lv_aux[p->cg_type_sp - 2u] = p->cg_lv_aux[p->cg_type_sp - 1u]; p->cg_type_stack[p->cg_type_sp - 1u] = a; p->cg_value_flags[p->cg_type_sp - 1u] = af; + p->cg_lv_aux[p->cg_type_sp - 1u] = ax; } } +PcgLvAux* pcg_top_lv_aux(Parser* p) { + return p->cg_type_sp ? &p->cg_lv_aux[p->cg_type_sp - 1u] : NULL; +} + +PcgLvAux* pcg_lv_aux_at(Parser* p, u32 depth) { + return (p->cg_type_sp > depth) ? &p->cg_lv_aux[p->cg_type_sp - 1u - depth] + : NULL; +} + const Type* pcg_top_type(Parser* p) { return p->cg_type_sp ? p->cg_type_stack[p->cg_type_sp - 1u] : NULL; } @@ -117,6 +159,7 @@ void pcg_retag_top(Parser* p, const Type* ty) { if (p->cg_type_sp) { p->cg_type_stack[p->cg_type_sp - 1u] = ty; p->cg_value_flags[p->cg_type_sp - 1u] = 0; + pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp - 1u]); } } @@ -319,7 +362,10 @@ FrameSlot pcg_local(Parser* p, const FrameSlotDesc* fsd) { if (!pcg_emit_enabled(p)) return FRAME_SLOT_NONE; attrs.name = fsd->name; attrs.align = fsd->align; - if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_LOCAL_ADDRESS_TAKEN; + /* FSF_ADDR_TAKEN is no longer propagated to CG: there is no + * CFREE_CG_LOCAL_ADDRESS_TAKEN attribute. The C-side flag stays for any + * parser-internal uses; opt's opt_promote_scalar_locals (Stream I) decides + * register-promotion from observed access patterns, not from the flag. */ return cfree_cg_local(p->cg, pcg_tid(p, fsd->type), attrs); } @@ -329,7 +375,6 @@ FrameSlot pcg_param_slot(Parser* p, u32 index, const FrameSlotDesc* fsd) { memset(&attrs, 0, sizeof attrs); attrs.name = fsd->name; attrs.align = fsd->align; - if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_LOCAL_ADDRESS_TAKEN; return cfree_cg_param(p->cg, index, pcg_tid(p, fsd->type), attrs); } @@ -363,54 +408,230 @@ void pcg_push_float(Parser* p, double v, const Type* ty) { pcg_push_type(p, ty); } +/* Fill `access` (CfreeCgMemAccess) and `ea` (CfreeCgEffAddr) for a memop + * against the TOS lvalue. The lvalue's pending EA on aux is consumed: caller + * is expected to follow with the matching cfree_cg_load / cfree_cg_store. */ +static void pcg_consume_ea_for_top(Parser* p, const Type* access_ty, + CfreeCgMemAccess* access, + CfreeCgEffAddr* ea) { + PcgLvAux* lv = pcg_top_lv_aux(p); + *access = pcg_mem(p, access_ty); + if (lv && lv->bit_width) { + access->bit_offset = lv->bit_offset; + access->bit_width = lv->bit_width; + access->storage_size = lv->storage_size; + access->bit_signed = lv->bit_signed; + } + ea->offset = lv ? lv->offset : 0; + ea->scale = lv ? lv->scale : 0u; +} + void pcg_push_local_typed(Parser* p, FrameSlot s, const Type* ty) { if (pcg_emit_enabled(p)) cfree_cg_push_local(p->cg, s); pcg_push_type(p, ty); - if (p->cg_type_sp) + if (p->cg_type_sp) { p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(ty); + p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_LOCAL; + } } void pcg_push_global(Parser* p, ObjSymId sym, const Type* ty) { - if (pcg_emit_enabled(p)) cfree_cg_push_symbol_lvalue(p->cg, sym, 0); + /* push_symbol_addr produces a pointer rvalue; the parser tags the slot as + * a C-language lvalue with PCG_LV_BASE_POINTER_RV so subsequent + * load/store/addr know the base is already a pointer. The cg layer accepts + * pointer-rvalue bases for memops uniformly (Stream A). */ + if (pcg_emit_enabled(p)) cfree_cg_push_symbol_addr(p->cg, sym, 0); pcg_push_type(p, ty); - if (p->cg_type_sp) + if (p->cg_type_sp) { p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(ty); + p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_POINTER_RV; + } } void pcg_load(Parser* p) { + const Type* ty = pcg_top_type(p); int was_lvalue = pcg_top_is_lvalue(p); - if (pcg_emit_enabled(p)) cfree_cg_load(p->cg, pcg_mem(p, pcg_top_type(p))); - if (was_lvalue && p->cg_type_sp) p->cg_value_flags[p->cg_type_sp - 1u] = 0; + if (pcg_emit_enabled(p)) { + CfreeCgMemAccess access; + CfreeCgEffAddr ea; + pcg_consume_ea_for_top(p, ty, &access, &ea); + cfree_cg_load(p->cg, access, ea); + } + if (was_lvalue && p->cg_type_sp) { + p->cg_value_flags[p->cg_type_sp - 1u] = 0; + pcg_aux_clear(&p->cg_lv_aux[p->cg_type_sp - 1u]); + } +} + +/* Materialize the pending EA on the TOS lvalue as a pointer rvalue. + * Postcondition: TOS is a pointer rvalue of type result_ptr_ty (which the + * caller has computed as type_ptr(pool, current_lv_type)) and the CG stack + * holds that single pointer where the lvalue's [base] or [base, index] used + * to be. Aux is cleared. + * + * The materialization sequence depends on the aux: + * base_kind == LOCAL: + * - scale == 0, offset == 0: addr + * - scale == 0, offset != 0: addr ; ptr_to_int ; +offset ; int_to_ptr + * - scale != 0: addr ; ptr_to_int ; idx*scale + ofs ; int_to_ptr + * base_kind == POINTER_RV: + * - scale == 0, offset == 0: no-op + * - scale == 0, offset != 0: ptr_to_int ; +offset ; int_to_ptr + * - scale != 0: ptr_to_int ; idx*scale + ofs ; int_to_ptr */ +static void pcg_materialize_lv_to_ptr(Parser* p, const Type* result_ptr_ty) { + PcgLvAux* lv = pcg_top_lv_aux(p); + int emit = pcg_emit_enabled(p); + PcgLvBaseKind base_kind = lv ? (PcgLvBaseKind)lv->base_kind + : PCG_LV_BASE_LOCAL; + i64 ofs = lv ? lv->offset : 0; + u32 scale = lv ? lv->scale : 0u; + const Type* idx_ty = c_abi_ptrdiff_type(p->abi, p->pool); + CfreeCgTypeId idx_tid = pcg_tid(p, idx_ty); + CfreeCgTypeId ptr_tid = pcg_tid(p, result_ptr_ty); + if (scale == 0 && ofs == 0) { + if (base_kind == PCG_LV_BASE_LOCAL) { + if (emit) cfree_cg_addr(p->cg); + } + /* Already a pointer with no pending modifiers. */ + } else if (scale == 0) { + if (emit) { + if (base_kind == PCG_LV_BASE_LOCAL) cfree_cg_addr(p->cg); + cfree_cg_ptr_to_int(p->cg, idx_tid); + cfree_cg_push_int(p->cg, (uint64_t)ofs, idx_tid); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE); + cfree_cg_int_to_ptr(p->cg, ptr_tid); + } + } else { + /* CG stack on entry: [base_ptr_now, index]. Compute + * base_ptr_now + index*scale + ofs. */ + if (emit) { + if (base_kind == PCG_LV_BASE_LOCAL) { + cfree_cg_swap(p->cg); /* [index, base_lv] */ + cfree_cg_addr(p->cg); /* [index, base_ptr] */ + cfree_cg_swap(p->cg); /* [base_ptr, index] */ + } + cfree_cg_swap(p->cg); /* [index, base_ptr] */ + cfree_cg_ptr_to_int(p->cg, idx_tid); + cfree_cg_swap(p->cg); /* [base_int, index] */ + cfree_cg_push_int(p->cg, (uint64_t)scale, idx_tid); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_MUL, CFREE_CG_INTOP_NONE); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE); + if (ofs != 0) { + cfree_cg_push_int(p->cg, (uint64_t)ofs, idx_tid); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, CFREE_CG_INTOP_NONE); + } + cfree_cg_int_to_ptr(p->cg, ptr_tid); + } + } + pcg_retag_top(p, result_ptr_ty); + { + PcgLvAux* out = pcg_top_lv_aux(p); + if (out) out->base_kind = PCG_LV_BASE_POINTER_RV; + } } void pcg_addr(Parser* p) { const Type* ty = pcg_top_type(p); - if (pcg_emit_enabled(p)) cfree_cg_addr(p->cg); - pcg_retag_top(p, type_ptr(p->pool, ty)); + pcg_materialize_lv_to_ptr(p, type_ptr(p->pool, ty)); } +/* Store [lv, rv] -> [rv]. The expression-value of an assignment is the + * assigned rvalue, so the store sequence must leave a copy of rv on TOS. */ void pcg_store(Parser* p) { const Type* lv_ty = pcg_top2_type(p); const Type* rv_ty = pcg_top_type(p); const Type* mem_ty = lv_ty; int emit = pcg_emit_enabled(p); + CfreeCgMemAccess access; + CfreeCgEffAddr ea; + PcgLvAux* lv; if (rv_ty && type_is_ptr(rv_ty) && (!lv_ty || !type_is_ptr(lv_ty))) { mem_ty = rv_ty; } - if (emit) cfree_cg_dup(p->cg); - pcg_dup_type(p); - if (emit) cfree_cg_rot3(p->cg); - pcg_rot3_type(p); - if (emit) cfree_cg_swap(p->cg); - pcg_swap_type(p); - if (emit) cfree_cg_store(p->cg, pcg_mem(p, mem_ty ? mem_ty : rv_ty)); + /* The aux to consume lives on the lvalue slot at parser depth 1. */ + lv = pcg_lv_aux_at(p, 1); + access = pcg_mem(p, mem_ty ? mem_ty : rv_ty); + if (lv && lv->bit_width) { + access.bit_offset = lv->bit_offset; + access.bit_width = lv->bit_width; + access.storage_size = lv->storage_size; + access.bit_signed = lv->bit_signed; + } + ea.offset = lv ? lv->offset : 0; + ea.scale = lv ? lv->scale : 0u; + if (ea.scale == 0 && + !(rv_ty && (rv_ty->kind == TY_INT128 || rv_ty->kind == TY_UINT128 || + rv_ty->kind == TY_LDOUBLE))) { + if (emit) { + cfree_cg_dup(p->cg); + cfree_cg_rot3(p->cg); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, access, ea); + } + } else if (ea.scale == 0) { + /* Simple lvalue: stash rv so the actual destination store does not have + * to keep a duplicate expression result live under register pressure. */ + FrameSlotDesc fsd; + FrameSlot tmp; + memset(&fsd, 0, sizeof fsd); + fsd.type = rv_ty; + fsd.size = c_abi_sizeof(p->abi, rv_ty); + fsd.align = c_abi_alignof(p->abi, rv_ty); + fsd.kind = FS_LOCAL; + tmp = pcg_local(p, &fsd); + if (emit) { + CfreeCgMemAccess rv_access = pcg_mem(p, rv_ty); + CfreeCgEffAddr zero_ea; + zero_ea.offset = 0; + zero_ea.scale = 0; + /* [base, rv] */ + cfree_cg_push_local(p->cg, tmp); /* [base, rv, tmp] */ + cfree_cg_swap(p->cg); /* [base, tmp, rv] */ + cfree_cg_store(p->cg, rv_access, zero_ea); /* [base] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, rv_access, zero_ea); /* [base, rv] */ + cfree_cg_store(p->cg, access, ea); /* [] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, rv_access, zero_ea); /* [rv] */ + } + } else { + /* Indexed lvalue: CG stack on entry is [base, idx, rv]. Stash rv into a + * compiler-temp local so we can reorder cleanly, then re-load after the + * store. */ + FrameSlotDesc fsd; + FrameSlot tmp; + memset(&fsd, 0, sizeof fsd); + fsd.type = rv_ty; + fsd.size = c_abi_sizeof(p->abi, rv_ty); + fsd.align = c_abi_alignof(p->abi, rv_ty); + fsd.kind = FS_LOCAL; + tmp = pcg_local(p, &fsd); + if (emit) { + CfreeCgMemAccess rv_access = pcg_mem(p, rv_ty); + CfreeCgEffAddr zero_ea; + zero_ea.offset = 0; + zero_ea.scale = 0; + /* [base, idx, rv] */ + cfree_cg_push_local(p->cg, tmp); /* [base, idx, rv, tmp] */ + cfree_cg_swap(p->cg); /* [base, idx, tmp, rv] */ + cfree_cg_store(p->cg, rv_access, zero_ea); /* [base, idx] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, rv_access, zero_ea); /* [base, idx, rv] */ + cfree_cg_store(p->cg, access, ea); /* [] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, rv_access, zero_ea); /* [rv] */ + } + } pcg_drop_type(p); pcg_drop_type(p); + pcg_push_type(p, rv_ty); } void pcg_deref(Parser* p, const Type* pointee) { const Type* ptr_ty = pcg_top_type(p); if (pointee && pointee->kind == TY_FUNC) { + /* Function lvalues collapse to function pointers in C; no CG-level + * dereference is needed (functions aren't first-class data). */ pcg_retag_top(p, pointee); return; } @@ -419,10 +640,81 @@ void pcg_deref(Parser* p, const Type* pointee) { if (pcg_emit_enabled(p)) cfree_cg_bitcast(p->cg, pcg_tid(p, want_ptr_ty)); pcg_retag_top(p, want_ptr_ty); } - if (pcg_emit_enabled(p)) cfree_cg_indirect(p->cg); + /* No cfree_cg_indirect: the cg load/store accept pointer-rvalue bases + * directly. Mark the slot as a C-language lvalue with POINTER_RV base; the + * pointer stays on the CG stack untouched. */ pcg_retag_top(p, pointee); - if (p->cg_type_sp) + if (p->cg_type_sp) { p->cg_value_flags[p->cg_type_sp - 1u] = pcg_lvalue_flags_for_type(pointee); + p->cg_lv_aux[p->cg_type_sp - 1u].base_kind = PCG_LV_BASE_POINTER_RV; + } +} + +/* ---- Lvalue chain helpers ---- */ + +void pcg_lv_member(Parser* p, i64 byte_offset, const Type* field_ty, + u16 bf_offset, u16 bf_width, u32 bf_storage_size) { + PcgLvAux* lv = pcg_top_lv_aux(p); + int was_lvalue = pcg_top_is_lvalue(p); + i64 saved_offset = lv ? lv->offset + byte_offset : byte_offset; + u32 saved_scale = lv ? lv->scale : 0u; + u8 saved_base_kind = lv ? lv->base_kind : PCG_LV_BASE_LOCAL; + /* Bumping the offset preserves the base kind and any earlier offset/scale + * accumulated on the chain (`a[i].f.g` keeps `scale = sizeof(elem)` and + * adds the field offsets). */ + pcg_retag_top(p, field_ty); + if (was_lvalue) pcg_set_top_lvalue(p); + /* pcg_retag_top cleared aux; re-apply the bumped offset and base kind. */ + { + PcgLvAux* lv_after = pcg_top_lv_aux(p); + if (lv_after) { + lv_after->offset = saved_offset; + lv_after->scale = saved_scale; + lv_after->base_kind = saved_base_kind; + lv_after->bit_offset = bf_offset; + lv_after->bit_width = bf_width; + lv_after->storage_size = bf_storage_size; + lv_after->bit_signed = pcg_type_is_signed(field_ty) ? 1u : 0u; + } + if (bf_width && p->cg_type_sp) + p->cg_value_flags[p->cg_type_sp - 1u] |= PCG_VALUE_BITFIELD; + } +} + +void pcg_lv_subscript(Parser* p, u32 elem_size, const Type* elem_ty) { + /* Stack on entry (parser side): [base_lv, index_rv]. + * Stack on entry (CG side): [base, index]. + * After this call (parser): [elem_lv] with aux.scale = elem_size. + * After this call (CG): [base, index] — unchanged; the eventual + * load/store consumes both via the EA. */ + PcgLvAux* base_lv = pcg_lv_aux_at(p, 1); + i64 saved_offset = base_lv ? base_lv->offset : 0; + u8 base_is_lvalue = + (p->cg_type_sp >= 2u && + (p->cg_value_flags[p->cg_type_sp - 2u] & PCG_VALUE_LVALUE) != 0); + u8 saved_base_kind = !base_is_lvalue + ? PCG_LV_BASE_POINTER_RV + : (base_lv ? base_lv->base_kind + : PCG_LV_BASE_LOCAL); + if (base_lv && base_lv->scale != 0) { + perr(p, "internal: nested subscript without materialization"); + } + pcg_drop_type(p); /* drop index parser slot */ + pcg_retag_top(p, elem_ty); /* retag base parser slot as element */ + pcg_set_top_lvalue(p); + { + PcgLvAux* lv = pcg_top_lv_aux(p); + if (lv) { + lv->offset = saved_offset; + lv->scale = elem_size; + lv->base_kind = saved_base_kind; + } + } +} + +void pcg_decay_array(Parser* p, const Type* arr_ty) { + const Type* ptr_ty = type_ptr(p->pool, arr_ty->arr.elem); + pcg_materialize_lv_to_ptr(p, ptr_ty); } void pcg_binop(Parser* p, BinOp op) { @@ -515,49 +807,90 @@ void pcg_convert(Parser* p, const Type* dst) { void pcg_inc_dec(Parser* p, BinOp op, int post) { const Type* ty = pcg_top_type(p); - if (pcg_emit_enabled(p)) { + if (!pcg_emit_enabled(p)) { + /* Drop the lvalue parser slot and push the rvalue result type. */ + pcg_drop_type(p); + pcg_push_type(p, ty); + return; + } + { CfreeCgIntBinOp cg_op = pcg_int_binop(op); - CfreeCgMemAccess mem = pcg_mem(p, ty); + PcgLvAux* lv = pcg_top_lv_aux(p); + int indexed = lv && lv->scale != 0; + CfreeCgMemAccess access; + CfreeCgEffAddr ea; + const Type* step_ty = ty; + u32 step = 1; + pcg_consume_ea_for_top(p, ty, &access, &ea); if (ty && ty->kind == TY_PTR) { const Type* pointee = ty->ptr.pointee; - const Type* idx_ty = c_abi_ptrdiff_type(p->abi, p->pool); - u32 step; if (pointee && pointee->kind == TY_VOID) perr(p, "pointer arithmetic on void pointer"); step = c_abi_sizeof(p->abi, pointee); - cfree_cg_dup(p->cg); /* [lv, lv] */ - cfree_cg_load(p->cg, mem); /* [lv, old] */ - if (post) { - FrameSlotDesc fsd; - FrameSlot old_slot; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty; - fsd.size = c_abi_sizeof(p->abi, ty); - fsd.align = c_abi_alignof(p->abi, ty); - fsd.kind = FS_LOCAL; - old_slot = pcg_local(p, &fsd); - - cfree_cg_dup(p->cg); /* [lv, old, old] */ - cfree_cg_push_local(p->cg, old_slot); - cfree_cg_swap(p->cg); /* [lv, old, tmp, old] */ - cfree_cg_store(p->cg, mem); /* [lv, old] */ - cfree_cg_push_int(p->cg, step, pcg_tid(p, idx_ty)); - cfree_cg_int_binop(p->cg, cg_op, 0); /* [lv, new] */ - cfree_cg_store(p->cg, mem); /* [] */ - cfree_cg_push_local(p->cg, old_slot); - cfree_cg_load(p->cg, mem); /* [old] */ - } else { - cfree_cg_push_int(p->cg, step, pcg_tid(p, idx_ty)); - cfree_cg_int_binop(p->cg, cg_op, 0); /* [lv, new] */ - cfree_cg_dup(p->cg); /* [lv, new, new] */ - cfree_cg_rot3(p->cg); /* [new, new, lv] */ - cfree_cg_swap(p->cg); /* [new, lv, new] */ - cfree_cg_store(p->cg, mem); /* [new] */ + step_ty = c_abi_ptrdiff_type(p->abi, p->pool); + } + /* Allocate a temp to stash the previous (post=1) or new (post=0) value + * for the expression-value. Both indexed and simple paths use the same + * stash so the resulting sequence is uniform. */ + { + FrameSlotDesc fsd; + FrameSlot tmp; + const Type* result_ty = ty; + memset(&fsd, 0, sizeof fsd); + fsd.type = result_ty; + fsd.size = c_abi_sizeof(p->abi, result_ty); + fsd.align = c_abi_alignof(p->abi, result_ty); + fsd.kind = FS_LOCAL; + tmp = pcg_local(p, &fsd); + { + CfreeCgMemAccess r_access = pcg_mem(p, result_ty); + CfreeCgEffAddr zero_ea = {0, 0}; + /* Duplicate the lvalue base (+ index if indexed) so we can load the + * old value AND store the new value through the same address. */ + if (indexed) { + cfree_cg_dup2(p->cg); + } else { + cfree_cg_dup(p->cg); + } + cfree_cg_load(p->cg, access, ea); /* ..., lv-base[, idx], old */ + if (post) { + /* Stash old, compute new, store, then re-load old as result. */ + cfree_cg_dup(p->cg); /* ..., lv-base[, idx], old, old */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, r_access, zero_ea); /* ..., lv-base[, idx], old */ + if (ty && ty->kind == TY_PTR) { + cfree_cg_push_int(p->cg, step, pcg_tid(p, step_ty)); + } else { + cfree_cg_push_int(p->cg, 1, pcg_tid(p, step_ty)); + } + cfree_cg_int_binop(p->cg, cg_op, 0); /* ..., lv-base[, idx], new */ + cfree_cg_store(p->cg, access, ea); /* [] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, r_access, zero_ea); /* [old] */ + } else { + /* Compute new, stash new, store, then re-load new as result. */ + if (ty && ty->kind == TY_PTR) { + cfree_cg_push_int(p->cg, step, pcg_tid(p, step_ty)); + } else { + cfree_cg_push_int(p->cg, 1, pcg_tid(p, step_ty)); + } + cfree_cg_int_binop(p->cg, cg_op, 0); /* ..., lv-base[, idx], new */ + cfree_cg_dup(p->cg); /* ..., lv-base[, idx], new, new */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, r_access, zero_ea); /* ..., lv-base[, idx], new */ + cfree_cg_store(p->cg, access, ea); /* [] */ + cfree_cg_push_local(p->cg, tmp); + cfree_cg_load(p->cg, r_access, zero_ea); /* [new] */ + } + (void)step; } - } else { - cfree_cg_inc_dec(p->cg, cg_op, post, pcg_tid(p, ty), mem); } } + /* Parser stack: drop the lvalue slot, push the result rvalue type. */ + pcg_drop_type(p); + pcg_push_type(p, ty); } void pcg_call(Parser* p, u32 nargs, const Type* fn_type) { diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h @@ -14,6 +14,42 @@ typedef CfreeCgLocal FrameSlot; #define FRAME_SLOT_NONE CFREE_CG_LOCAL_NONE #define OBJ_GROUP_NONE 0u +/* Lvalue auxiliary state, carried parallel to cg_type_stack / cg_value_flags. + * + * The C parser tracks one logical "C-language value" per stack slot. When that + * slot is a C-language lvalue (PCG_VALUE_LVALUE), the aux below records the + * pending effective-address modifiers and bit-field metadata that the next + * load / store / addr will fold onto the CG memop. + * + * Lvalue chains (`s.f`, `a[i].g`, etc.) accumulate into this aux instead of + * emitting per-step CG ops: there is no CG-level `field` / `index` / + * `addr_offset` op anymore. Field offsets bump `offset`; subscripts set + * `scale` and leave the evaluated index value on the CG stack just above the + * lvalue base. Bit-field selections fill `bit_*`. The aux is consumed by the + * very next pcg_load / pcg_store / pcg_addr that crosses the slot. + * + * `base_kind` records what the CG-stack base under this lvalue actually is — + * either an OPK_LOCAL produced by push_local (PCG_LV_BASE_LOCAL) or a + * pointer rvalue from push_symbol_addr, push_local_addr, dereference, or + * pointer arithmetic (PCG_LV_BASE_POINTER_RV). Stream A's CG-side memops + * accept either shape uniformly; pcg_addr uses the distinction to decide + * whether to emit cfree_cg_addr or treat the base as already-a-pointer. */ +typedef enum PcgLvBaseKind { + PCG_LV_BASE_LOCAL = 0, + PCG_LV_BASE_POINTER_RV = 1, +} PcgLvBaseKind; + +typedef struct PcgLvAux { + i64 offset; + u32 scale; + u16 bit_offset; + u16 bit_width; + u32 storage_size; + u8 bit_signed; + u8 base_kind; /* PcgLvBaseKind */ + u8 pad[6]; +} PcgLvAux; + typedef enum BinOp { BO_IADD, BO_ISUB, @@ -211,6 +247,45 @@ void pcg_load(Parser*); void pcg_addr(Parser*); void pcg_store(Parser*); void pcg_deref(Parser*, const Type*); + +/* ---- Lvalue auxiliary access ---- + * + * pcg_top_lv_aux returns a mutable pointer to TOS's lvalue aux, used by + * parse_postfix and the initializer / compound-assignment paths to fold + * field offsets and bit-field metadata inline. Returns NULL if the parser + * stack is empty; behavior on a non-lvalue TOS is the caller's responsibility + * (parse_postfix has already validated lvalueness before calling). */ +PcgLvAux* pcg_top_lv_aux(Parser*); +PcgLvAux* pcg_lv_aux_at(Parser*, u32 depth); + +/* ---- Lvalue chain helpers ---- + * + * Each maps directly to the canonical encodings in doc/INDIRECT.md without + * emitting any intermediate field / index / addr_offset CG op. Field offsets + * and array scales are accumulated on the TOS lvalue's aux; the next + * pcg_load / pcg_store / pcg_addr consumes them. */ + +/* Fold `s.f` (or any path-resolved field selection) into the TOS lvalue. + * byte_offset is the cumulative offset within the record; ty is the field + * type; bf_* are bit-field metadata (bf_width == 0 for non-bitfields). The + * caller is responsible for verifying TOS is an lvalue of a record type. */ +void pcg_lv_member(Parser*, i64 byte_offset, const Type* field_ty, + u16 bf_offset, u16 bf_width, u32 bf_storage_size); + +/* Attach `[index]` to the TOS lvalue. PRECONDITION: the index value has just + * been pushed onto the CG stack (and parser stack) above the lvalue base; + * the parser stack therefore has [base_lv, index] at depth [1, 0]. This call + * records `scale = elem_size` on the base's aux, drops the index parser + * slot (leaving the index value on the CG stack for the eventual memop), + * and retags the surviving slot as elem_ty (lvalue). */ +void pcg_lv_subscript(Parser*, u32 elem_size, const Type* elem_ty); + +/* Decay an array lvalue at TOS into a pointer-to-element rvalue. Emits + * cfree_cg_addr (or a no-op for pointer-rvalue bases) and folds any pending + * EA modifiers into the resulting pointer via ptr arithmetic. After return, + * TOS is a pointer rvalue of type `*arr_ty->elem`. */ +void pcg_decay_array(Parser*, const Type* arr_ty); + void pcg_binop(Parser*, BinOp); void pcg_unop(Parser*, UnOp); void pcg_cmp(Parser*, CmpOp); diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -1032,9 +1032,7 @@ i64 eval_const_int(Parser* p, SrcLoc loc) { * ============================================================ */ static void decay_array_to_pointer(Parser* p, const Type* arr_ty) { - const Type* ptr_ty = type_ptr(p->pool, arr_ty->arr.elem); - if (pcg_emit_enabled(p)) cfree_cg_addr_offset(p->cg, 0, pcg_tid(p, ptr_ty)); - pcg_retag_top(p, ptr_ty); + pcg_decay_array(p, arr_ty); } static FrameSlot vla_size_slot_for_type(VLABound* bounds, const Type* ty) { @@ -1046,6 +1044,7 @@ static FrameSlot vla_size_slot_for_type(VLABound* bounds, const Type* ty) { void to_rvalue(Parser* p) { const Type* t = cg_top_type(p->cg); + int is_lvalue = pcg_top_is_lvalue(p); if (t) { if (t->kind == TY_ARRAY) { decay_array_to_pointer(p, t); @@ -1056,11 +1055,21 @@ void to_rvalue(Parser* p) { return; } if (t->kind == TY_STRUCT || t->kind == TY_UNION) { - p->cg_type_stack[p->cg_type_sp - 1u] = type_unqual(p->pool, t); + const Type* uty = type_unqual(p->pool, t); + PcgLvAux* lv = pcg_top_lv_aux(p); + int materialize = + is_lvalue && lv && + (lv->offset != 0 || lv->scale != 0 || + lv->base_kind == PCG_LV_BASE_POINTER_RV); + p->cg_type_stack[p->cg_type_sp - 1u] = uty; + if (materialize) { + pcg_addr(p); + pcg_deref(p, uty); + } return; } } - cg_load(p->cg); + if (is_lvalue) cg_load(p->cg); } /* ============================================================ @@ -2103,13 +2112,33 @@ static int find_record_member_path(Parser* p, const Type* rec_ty, Sym mname, static void cg_record_member_path(Parser* p, const Type* member_ty, const u32* path, u32 depth, const Field* field) { - int was_lvalue = pcg_top_is_lvalue(p); + /* Walk the path locally to compute the cumulative byte offset; pull + * bit-field metadata from the final ABIFieldLayout when applicable. The + * field/index/addr_offset CG ops are gone — pcg_lv_member folds the offset + * (and any bit-field meta) onto the TOS lvalue's aux for the next memop. */ + const Type* cur_ty = pcg_top_type(p); + i64 total_offset = 0; + u16 bf_off = 0; + u16 bf_w = 0; + u32 bf_ss = 0; + cur_ty = type_unqual(p->pool, cur_ty); for (u32 i = 0; i < depth; ++i) { - if (pcg_emit_enabled(p)) cfree_cg_field(p->cg, path[i]); + const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, cur_ty); + const ABIFieldLayout* fl; + const Field* f; + if (!L) break; + fl = &L->fields[path[i]]; + f = &cur_ty->rec.fields[path[i]]; + total_offset += (i64)fl->offset; + if (i + 1u == depth && (f->flags & FIELD_BITFIELD)) { + bf_off = fl->bit_offset; + bf_w = fl->bit_width; + bf_ss = fl->storage_size; + } + cur_ty = type_unqual(p->pool, f->type); } - pcg_retag_top(p, member_ty); - if (was_lvalue) pcg_set_top_lvalue(p); - if (field && (field->flags & FIELD_BITFIELD)) pcg_set_top_bitfield(p); + (void)field; + pcg_lv_member(p, total_offset, member_ty, bf_off, bf_w, bf_ss); } static void parse_postfix(Parser* p) { @@ -2150,7 +2179,7 @@ static void parse_postfix(Parser* p) { } else if (top && top->kind == TY_PTR && top->ptr.pointee && top->ptr.pointee->kind == TY_FUNC) { fn_type = top->ptr.pointee; - cg_load(p->cg); + if (pcg_top_is_lvalue(p)) cg_load(p->cg); } else { perr(p, "called object is not a function"); } @@ -2191,7 +2220,7 @@ static void parse_postfix(Parser* p) { if (lt0 && lt0->kind == TY_ARRAY) { decay_array_to_pointer(p, lt0); } else if (lt0 && lt0->kind == TY_PTR) { - cg_load(p->cg); + if (pcg_top_is_lvalue(p)) cg_load(p->cg); } parse_expr(p); { @@ -2228,10 +2257,8 @@ static void parse_postfix(Parser* p) { p->last_pushed_vla_slot = elem_vla_slot; p->last_pushed_vla_bounds = vla_bounds; } else { - if (pcg_emit_enabled(p)) cfree_cg_index(p->cg, 0); - pcg_drop_type(p); - pcg_retag_top(p, elem); - pcg_set_top_lvalue(p); + u32 elem_size = c_abi_sizeof(p->abi, elem); + pcg_lv_subscript(p, elem_size, elem); } } } @@ -3314,7 +3341,16 @@ void parse_assign_expr(Parser* p) { cg_store(p->cg); return; } - cg_dup(p->cg); + { + PcgLvAux* lv = pcg_top_lv_aux(p); + if (pcg_emit_enabled(p)) { + if (lv && lv->scale != 0) + cfree_cg_dup2(p->cg); + else + cfree_cg_dup(p->cg); + } + pcg_dup_type(p); + } cg_load(p->cg); parse_assign_expr(p); to_rvalue(p); diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c @@ -159,23 +159,40 @@ static int try_init_aggregate_from_expr(Parser* p, FrameSlot slot, * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */ void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* elem_ty) { - const Type* elem_ptr_ty = type_ptr(p->pool, elem_ty); cg_push_local_typed(p->cg, slot, arr_ty); + /* Fold the byte offset onto the local lvalue's aux; the next + * load/store/addr will bake it into the memop's ea.offset. The result is an + * lvalue of elem_ty backed by the frame slot. */ + pcg_lv_member(p, (i64)offset, elem_ty, /*bf_off=*/0, /*bf_w=*/0, /*ss=*/0); +} + +static void zero_object_bytes_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + CfreeCgMemAccess access = pcg_mem(p, ty); + push_subobject_lv(p, slot, arr_ty, offset, ty); + pcg_addr(p); if (pcg_emit_enabled(p)) { - cfree_cg_addr_offset(p->cg, (i64)offset, pcg_tid(p, elem_ptr_ty)); + cfree_cg_memset(p->cg, 0, c_abi_sizeof(p->abi, ty), access); } - pcg_retag_top(p, elem_ptr_ty); - cg_deref(p->cg, elem_ty); + pcg_drop_type(p); } static void push_record_field_lv(Parser* p, FrameSlot slot, const Type* arr_ty, u32 rec_offset, const Type* rec_ty, u32 field_index) { const Field* f = &rec_ty->rec.fields[field_index]; + const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, rec_ty); + u32 foff = L->fields[field_index].offset; + u16 bf_off = 0; + u16 bf_w = 0; + u32 bf_ss = 0; push_subobject_lv(p, slot, arr_ty, rec_offset, rec_ty); - if (pcg_emit_enabled(p)) cfree_cg_field(p->cg, field_index); - pcg_retag_top(p, f->type); - if (f->flags & FIELD_BITFIELD) pcg_set_top_bitfield(p); + if (f->flags & FIELD_BITFIELD) { + bf_off = L->fields[field_index].bit_offset; + bf_w = L->fields[field_index].bit_width; + bf_ss = L->fields[field_index].storage_size; + } + pcg_lv_member(p, (i64)foff, f->type, bf_off, bf_w, bf_ss); } /* Emit a load+store for one scalar leaf. */ @@ -183,15 +200,14 @@ static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty, u32 dst_off, FrameSlot src_ptr_slot, const Type* src_ptr_ty, u32 src_off, const Type* leaf_ty) { - const Type* leaf_ptr_ty = type_ptr(p->pool, leaf_ty); push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty); cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty); cg_load(p->cg); - if (pcg_emit_enabled(p)) { - cfree_cg_addr_offset(p->cg, (i64)src_off, pcg_tid(p, leaf_ptr_ty)); - } - pcg_retag_top(p, leaf_ptr_ty); - cg_deref(p->cg, leaf_ty); + /* TOS is now a pointer rvalue (the loaded source pointer). Retag as a + * C-language lvalue with POINTER_RV base, then fold the source byte offset + * onto its aux. The next cg_load consumes the EA into the memop. */ + pcg_deref(p, leaf_ty); + pcg_lv_member(p, (i64)src_off, leaf_ty, 0, 0, 0); cg_load(p->cg); cg_store(p->cg); cg_drop(p->cg); @@ -679,6 +695,7 @@ void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, return; } advance(p); /* '{' */ + zero_object_bytes_at(p, slot, arr_ty, offset, ty); init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1); expect_punct(p, '}', "'}' after struct initializer"); return; diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -197,6 +197,7 @@ typedef struct Parser { const Type** cg_type_stack; u8* cg_value_flags; + PcgLvAux* cg_lv_aux; u32 cg_type_sp; u32 cg_type_cap; diff --git a/lang/toy/asm.c b/lang/toy/asm.c @@ -473,15 +473,16 @@ int toy_parse_typed_asm_tail(ToyParser* p, CfreeCgTypeId result_ty, while (i > 0) { CfreeCgField field; uint32_t field_index; + uint64_t foff = 0; --i; field_index = record_field_indexes ? record_field_indexes[i] : i; if (cfree_cg_type_record_field(p->c, result_ty, field_index, &field, - NULL) != 0) + &foff) != 0) goto done; cfree_cg_push_local(p->cg, rec_slot); - cfree_cg_field(p->cg, field_index); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)foff, 0}); } cfree_cg_push_local(p->cg, rec_slot); } diff --git a/lang/toy/builtins.c b/lang/toy/builtins.c @@ -31,20 +31,20 @@ static void toy_store_top_to_local(ToyParser* p, CfreeCgLocal local, CfreeCgTypeId ty) { cfree_cg_push_local(p->cg, local); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, ty)); + cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); } static void toy_store_const_to_local(ToyParser* p, CfreeCgLocal local, CfreeCgTypeId ty, uint64_t value) { cfree_cg_push_local(p->cg, local); cfree_cg_push_int(p->cg, value, ty); - cfree_cg_store(p->cg, toy_mem_access(p, ty)); + cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); } static void toy_push_loaded_local(ToyParser* p, CfreeCgLocal local, CfreeCgTypeId ty) { cfree_cg_push_local(p->cg, local); - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); } static void toy_emit_dynamic_memory_loop(ToyParser* p, CfreeCgLocal dst_local, @@ -68,22 +68,24 @@ static void toy_emit_dynamic_memory_loop(ToyParser* p, CfreeCgLocal dst_local, toy_push_loaded_local(p, dst_local, u8_ptr_ty); toy_push_loaded_local(p, index_local, p->int_type); - cfree_cg_index(p->cg, 0); + /* The destination memop carries scale = sizeof(u8) = 1; it pops + * [dst_ptr, index] and then [value]. */ if (is_memset) { cfree_cg_push_int(p->cg, set_value, u8_ty); } else { toy_push_loaded_local(p, src_local, u8_ptr_ty); toy_push_loaded_local(p, index_local, p->int_type); - cfree_cg_index(p->cg, 0); - cfree_cg_load(p->cg, toy_mem_access(p, u8_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, u8_ty), + (CfreeCgEffAddr){0, 1}); } - cfree_cg_store(p->cg, toy_mem_access(p, u8_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, u8_ty), + (CfreeCgEffAddr){0, 1}); cfree_cg_push_local(p->cg, index_local); toy_push_loaded_local(p, index_local, p->int_type); cfree_cg_push_int(p->cg, 1, p->int_type); cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0); - cfree_cg_store(p->cg, toy_mem_access(p, p->int_type)); + cfree_cg_store(p->cg, toy_mem_access(p, p->int_type), (CfreeCgEffAddr){0, 0}); cfree_cg_jump(p->cg, loop_label); cfree_cg_label_place(p->cg, end_label); @@ -552,17 +554,17 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name, dst_slot = cfree_cg_local(p->cg, dst_ty, toy_slot_attrs(0)); cfree_cg_push_local(p->cg, src_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, src_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, src_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(p->cg, dst_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, dst_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, dst_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(p->cg, dst_slot); - cfree_cg_load(p->cg, toy_mem_access(p, dst_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, dst_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(p->cg, clear_mask, dst_ty); cfree_cg_int_binop(p->cg, CFREE_CG_INT_AND, 0); cfree_cg_push_local(p->cg, src_slot); - cfree_cg_load(p->cg, toy_mem_access(p, src_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, src_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(p->cg, src_mask, src_ty); cfree_cg_int_binop(p->cg, CFREE_CG_INT_AND, 0); if (lo > 0) { @@ -589,28 +591,11 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name, toy_error(p, p->cur.loc, "fma expects matching float operands"); return CFREE_CG_TYPE_NONE; } - { - CfreeCgLocal c_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0)); - CfreeCgLocal b_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0)); - CfreeCgLocal a_slot = cfree_cg_local(p->cg, a, toy_slot_attrs(0)); - cfree_cg_push_local(p->cg, c_slot); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, a)); - cfree_cg_push_local(p->cg, b_slot); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, a)); - cfree_cg_push_local(p->cg, a_slot); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, a)); - cfree_cg_push_local(p->cg, a_slot); - cfree_cg_load(p->cg, toy_mem_access(p, a)); - cfree_cg_push_local(p->cg, b_slot); - cfree_cg_load(p->cg, toy_mem_access(p, a)); - cfree_cg_fp_binop(p->cg, CFREE_CG_FP_MUL, 0); - cfree_cg_push_local(p->cg, c_slot); - cfree_cg_load(p->cg, toy_mem_access(p, a)); - cfree_cg_fp_binop(p->cg, CFREE_CG_FP_ADD, 0); - } + cfree_cg_rot3(p->cg); /* [b, c, a] */ + cfree_cg_rot3(p->cg); /* [c, a, b] */ + cfree_cg_fp_binop(p->cg, CFREE_CG_FP_MUL, 0); + cfree_cg_swap(p->cg); + cfree_cg_fp_binop(p->cg, CFREE_CG_FP_ADD, 0); return a; } @@ -980,14 +965,19 @@ CfreeCgTypeId toy_parse_generic_builtin(ToyParser* p, CfreeSym name, fields[1].type = toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL); rec_ty = cfree_cg_type_record(p->c, 0, fields, 2); rec_slot = cfree_cg_local(p->cg, rec_ty, toy_slot_attrs(0)); - cfree_cg_push_local(p->cg, rec_slot); - cfree_cg_field(p->cg, 1); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type)); - cfree_cg_push_local(p->cg, rec_slot); - cfree_cg_field(p->cg, 0); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type)); + { + uint64_t f0_off = 0, f1_off = 0; + cfree_cg_type_record_field(p->c, rec_ty, 0, NULL, &f0_off); + cfree_cg_type_record_field(p->c, rec_ty, 1, NULL, &f1_off); + cfree_cg_push_local(p->cg, rec_slot); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type), + (CfreeCgEffAddr){(int64_t)f1_off, 0}); + cfree_cg_push_local(p->cg, rec_slot); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type), + (CfreeCgEffAddr){(int64_t)f0_off, 0}); + } cfree_cg_push_local(p->cg, rec_slot); return rec_ty; } @@ -1295,14 +1285,19 @@ CfreeCgTypeId toy_parse_atomic_generic_builtin(ToyParser* p, CfreeSym name, fields[1].type = toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL); rec_ty = cfree_cg_type_record(p->c, 0, fields, 2); rec_slot = cfree_cg_local(p->cg, rec_ty, toy_slot_attrs(0)); - cfree_cg_push_local(p->cg, rec_slot); - cfree_cg_field(p->cg, 1); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type)); - cfree_cg_push_local(p->cg, rec_slot); - cfree_cg_field(p->cg, 0); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type)); + { + uint64_t f0_off = 0, f1_off = 0; + cfree_cg_type_record_field(p->c, rec_ty, 0, NULL, &f0_off); + cfree_cg_type_record_field(p->c, rec_ty, 1, NULL, &f1_off); + cfree_cg_push_local(p->cg, rec_slot); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, fields[1].type), + (CfreeCgEffAddr){(int64_t)f1_off, 0}); + cfree_cg_push_local(p->cg, rec_slot); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, fields[0].type), + (CfreeCgEffAddr){(int64_t)f0_off, 0}); + } cfree_cg_push_local(p->cg, rec_slot); return rec_ty; } diff --git a/lang/toy/expr.c b/lang/toy/expr.c @@ -42,16 +42,16 @@ CfreeCgTypeId toy_push_named_rvalue(ToyParser* p, CfreeSym name) { if (v) { toy_push_var_lvalue(p, v); if (cfree_cg_type_kind(p->c, v->type) != CFREE_CG_TYPE_RECORD) - cfree_cg_load(p->cg, toy_mem_access(p, v->type)); + cfree_cg_load(p->cg, toy_mem_access(p, v->type), (CfreeCgEffAddr){0, 0}); p->last_type = v->toy_type; return v->type; } { ToyGlobal* g = toy_find_global(p, name); if (g) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); if (cfree_cg_type_kind(p->c, g->type) != CFREE_CG_TYPE_RECORD) - cfree_cg_load(p->cg, toy_mem_access(p, g->type)); + cfree_cg_load(p->cg, toy_mem_access(p, g->type), (CfreeCgEffAddr){0, 0}); p->last_type = g->toy_type; return g->type; } @@ -458,7 +458,7 @@ CfreeCgTypeId toy_emit_var_lvalue(ToyParser* p, CfreeSym name) { { ToyGlobal* g = toy_find_global(p, name); if (g) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); return g->type; } } @@ -474,9 +474,11 @@ static void toy_store_tos_to_local(ToyParser* p, CfreeCgLocal local, CfreeCgTypeId ty) { cfree_cg_push_local(p->cg, local); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, ty)); + cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); } +/* Consumes [slice_base, idx] where slice_base is a pointer to the slice + * record. Produces [elem_ptr] -- a pointer-rvalue to the element. */ CfreeCgTypeId toy_emit_slice_index_lvalue(ToyParser* p, CfreeCgTypeId slice_ty, ToyTypeId slice_toy_type, ToyTypeId* elem_toy_out) { @@ -484,19 +486,25 @@ CfreeCgTypeId toy_emit_slice_index_lvalue(ToyParser* p, CfreeCgTypeId slice_ty, CfreeCgTypeId elem_ty = toy_type_id_cg_or_none(p, elem_toy); CfreeCgField ptr_field; CfreeCgLocal idx_slot; + uint64_t ptr_field_off = 0; if (elem_ty == CFREE_CG_TYPE_NONE || cfree_cg_type_kind(p->c, slice_ty) != CFREE_CG_TYPE_RECORD || - cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, NULL) != 0) { + cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, + &ptr_field_off) != 0) { toy_error(p, p->cur.loc, "cannot index non-array/non-pointer"); return CFREE_CG_TYPE_NONE; } + /* Stash the index, then load the slice's ptr field, then re-push idx + * and compute element pointer. */ idx_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); toy_store_tos_to_local(p, idx_slot, p->int_type); - cfree_cg_field(p->cg, 0); - cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type)); + cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type), + (CfreeCgEffAddr){(int64_t)ptr_field_off, 0}); cfree_cg_push_local(p->cg, idx_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_index(p->cg, 0); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); if (elem_toy_out) *elem_toy_out = elem_toy; return elem_ty; } @@ -536,39 +544,59 @@ CfreeCgTypeId toy_emit_slice_value(ToyParser* p, CfreeCgTypeId base_ty, slice_toy = toy_type_register_slice(p, elem_ty, elem_toy); slice_ty = toy_type_cg(p, slice_toy); - if (slice_ty == CFREE_CG_TYPE_NONE || - cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, NULL) != 0) { - toy_error(p, p->cur.loc, "failed to create slice type"); - return CFREE_CG_TYPE_NONE; - } + { + uint64_t ptr_off = 0; + uint64_t len_off = 0; + CfreeCgField len_field; + (void)len_field; + if (slice_ty == CFREE_CG_TYPE_NONE || + cfree_cg_type_record_field(p->c, slice_ty, 0, &ptr_field, &ptr_off) != 0 || + cfree_cg_type_record_field(p->c, slice_ty, 1, &len_field, &len_off) != 0) { + toy_error(p, p->cur.loc, "failed to create slice type"); + return CFREE_CG_TYPE_NONE; + } - end_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); - toy_store_tos_to_local(p, end_slot, p->int_type); - start_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); - toy_store_tos_to_local(p, start_slot, p->int_type); + end_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); + toy_store_tos_to_local(p, end_slot, p->int_type); + start_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); + toy_store_tos_to_local(p, start_slot, p->int_type); + + result_slot = cfree_cg_local(p->cg, slice_ty, toy_slot_attrs(0)); + /* TOS = [base] (slice lvalue/pointer or array lvalue/pointer). */ + if (toy_type_is_slice(p, base_toy_type)) { + /* Replace slice base with its data pointer (a pointer-rvalue). */ + cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type), + (CfreeCgEffAddr){(int64_t)ptr_off, 0}); + } else { + /* Array base: TOS is a pointer-rvalue (callers always project + * to a pointer for array/slice bases now). Bitcast to *elem. */ + cfree_cg_bitcast(p->cg, cfree_cg_type_ptr(p->c, elem_ty, 0)); + } + /* Compute (base + start * sizeof(elem)) as a pointer to the slice's + * first element. */ + cfree_cg_push_local(p->cg, start_slot); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); + /* Store the data pointer into result_slot.ptr. */ + cfree_cg_push_local(p->cg, result_slot); + cfree_cg_swap(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, ptr_field.type), + (CfreeCgEffAddr){(int64_t)ptr_off, 0}); - result_slot = cfree_cg_local(p->cg, slice_ty, toy_slot_attrs(0)); - if (toy_type_is_slice(p, base_toy_type)) { - cfree_cg_field(p->cg, 0); - cfree_cg_load(p->cg, toy_mem_access(p, ptr_field.type)); + /* len = end - start; store into result_slot.len. */ + cfree_cg_push_local(p->cg, result_slot); + cfree_cg_push_local(p->cg, end_slot); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + cfree_cg_push_local(p->cg, start_slot); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_SUB, 0); + cfree_cg_store(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){(int64_t)len_off, 0}); } - cfree_cg_push_local(p->cg, start_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_index(p->cg, 0); - cfree_cg_addr(p->cg); - cfree_cg_push_local(p->cg, result_slot); - cfree_cg_field(p->cg, 0); - cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, ptr_field.type)); - - cfree_cg_push_local(p->cg, result_slot); - cfree_cg_field(p->cg, 1); - cfree_cg_push_local(p->cg, end_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_push_local(p->cg, start_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_int_binop(p->cg, CFREE_CG_INT_SUB, 0); - cfree_cg_store(p->cg, toy_mem_access(p, p->int_type)); cfree_cg_push_local(p->cg, result_slot); if (slice_toy_out) *slice_toy_out = slice_toy; @@ -729,7 +757,12 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) { ToyVar* v = toy_find_var(p, name); if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY || cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) { - toy_push_var_lvalue(p, v); + if (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY) { + /* Array slicing/indexing wants a pointer root for address math. */ + toy_push_var_addr(p, v); + } else { + toy_push_var_lvalue(p, v); + } p->last_type = v->toy_type; return v->type; } @@ -738,7 +771,7 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) { if (g && (cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY || cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); p->last_type = g->toy_type; return g->type; } @@ -755,7 +788,7 @@ static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) { { ToyGlobal* g = toy_find_global(p, name); if (g && toy_type_is_slice(p, g->toy_type)) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); p->last_type = g->toy_type; return g->type; } @@ -798,8 +831,8 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { } ty = cfree_cg_type_ptr_pointee(p->c, ty); toy_ty = toy_type_pointee(p, toy_ty); - cfree_cg_indirect(p->cg); - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + /* TOS is a pointer-rvalue; memop accepts it as base directly. */ + cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty); continue; } @@ -834,15 +867,13 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, ty); ToyTypeId source_pointee = toy_type_pointee(p, toy_ty); if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) { - CfreeCgLocal idx_slot = - cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); - cfree_cg_push_local(p->cg, idx_slot); + /* TOS = [ptr-to-array, idx]. Cast pointer to *elem so the load + * can apply the array's elem-scale on the next memop. */ + CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee); + cfree_cg_swap(p->cg); + cfree_cg_bitcast(p->cg, cfree_cg_type_ptr(p->c, elem_ty, 0)); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_indirect(p->cg); - cfree_cg_push_local(p->cg, idx_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - ty = cfree_cg_type_array_elem(p->c, pointee); + ty = elem_ty; toy_ty = toy_type_array_elem(p, source_pointee); } else { ty = pointee; @@ -854,15 +885,16 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { } else if (toy_type_is_slice(p, toy_ty)) { ty = toy_emit_slice_index_lvalue(p, ty, toy_ty, &toy_ty); if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + cfree_cg_load(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty); continue; } else { toy_error(p, p->cur.loc, "cannot index non-array/non-pointer"); return CFREE_CG_TYPE_NONE; } - cfree_cg_index(p->cg, 0); - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + /* TOS = [base, idx]; load with element-size scale. */ + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, ty)}); p->last_type = toy_ty != TOY_TYPE_NONE ? toy_ty : toy_type_from_cg(p, ty); continue; } @@ -872,14 +904,16 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { uint32_t i, nfields; int found = 0; CfreeCgField found_field; + uint64_t found_off = 0; ToyNamedType* named; ToyTypeId field_toy_type = TOY_TYPE_NONE; if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR && cfree_cg_type_kind(p->c, cfree_cg_type_ptr_pointee(p->c, ty)) == CFREE_CG_TYPE_RECORD) { + /* TOS is a pointer to the record; the memop accepts it directly, + * so no intermediate op is needed -- we just update `ty`. */ ty = cfree_cg_type_ptr_pointee(p->c, ty); toy_ty = toy_type_pointee(p, toy_ty); - cfree_cg_indirect(p->cg); } named = toy_find_named_type_by_type(p, ty); if (p->cur.kind == TOK_NUMBER && !p->cur.is_float) { @@ -894,14 +928,14 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { field_index = (uint32_t)p->cur.int_value; toy_parser_advance(p); if (cfree_cg_type_record_field(p->c, ty, field_index, &found_field, - NULL) != 0) { + &found_off) != 0) { return CFREE_CG_TYPE_NONE; } - cfree_cg_field(p->cg, field_index); if (named && field_index < named->nfields) field_toy_type = named->fields[field_index].toy_type; ty = found_field.type; - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){(int64_t)found_off, 0}); if (field_toy_type != TOY_TYPE_NONE) { CfreeCgTypeId resolved = toy_type_resolved_cg(p, field_toy_type); p->last_type = field_toy_type; @@ -930,13 +964,14 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { memset(&found_field, 0, sizeof found_field); for (i = 0; i < nfields; ++i) { CfreeCgField field; - if (cfree_cg_type_record_field(p->c, ty, i, &field, NULL) == 0 && + uint64_t off = 0; + if (cfree_cg_type_record_field(p->c, ty, i, &field, &off) == 0 && field.name == field_name) { found = 1; found_field = field; + found_off = off; if (named && i < named->nfields) field_toy_type = named->fields[i].toy_type; - cfree_cg_field(p->cg, i); break; } } @@ -945,7 +980,8 @@ static CfreeCgTypeId toy_parse_expr_postfix(ToyParser* p) { return CFREE_CG_TYPE_NONE; } ty = found_field.type; - cfree_cg_load(p->cg, toy_mem_access(p, ty)); + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){(int64_t)found_off, 0}); if (field_toy_type != TOY_TYPE_NONE) { CfreeCgTypeId resolved = toy_type_resolved_cg(p, field_toy_type); p->last_type = field_toy_type; @@ -1029,44 +1065,24 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { if (p->cur.kind == TOK_LBRACKET || p->cur.kind == TOK_DOTSTAR || p->cur.kind == TOK_DOT) { + /* `&expr` chain. Maintain the invariant that TOS holds a + * pointer-rvalue of type *ty. The chain returns a pointer-rvalue + * directly (no trailing cfree_cg_addr needed). */ ToyTypeId ty_toy = TOY_TYPE_NONE; - if (p->cur.kind == TOK_DOT) { - ToyVar* v = toy_find_var(p, name); - ToyGlobal* g = toy_find_global(p, name); - if (v && cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD) { - toy_push_var_lvalue(p, v); - ty = v->type; - ty_toy = v->toy_type; - } else if (g && - cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); - ty = g->type; - ty_toy = g->toy_type; - } else { - ty = toy_push_named_rvalue(p, name); - ty_toy = p->last_type; - } - } else if (p->cur.kind == TOK_LBRACKET) { + { ToyVar* v = toy_find_var(p, name); ToyGlobal* g = toy_find_global(p, name); - if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY || - cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) { - toy_push_var_lvalue(p, v); + if (v) { + toy_push_var_addr(p, v); ty = v->type; ty_toy = v->toy_type; - } else if (g && - (cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY || - cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + } else if (g) { + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); ty = g->type; ty_toy = g->toy_type; } else { - ty = toy_push_named_rvalue(p, name); - ty_toy = p->last_type; + ty = CFREE_CG_TYPE_NONE; } - } else { - ty = toy_push_named_rvalue(p, name); - ty_toy = p->last_type; } if (ty == CFREE_CG_TYPE_NONE) { toy_error(p, p->cur.loc, "undefined variable"); @@ -1086,24 +1102,38 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { } if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR) { CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, ty); - if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) { + /* TOS holds **T_chain; load the inner pointer first. */ + { CfreeCgLocal idx_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); cfree_cg_push_local(p->cg, idx_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_indirect(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(p->cg, idx_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - ty = cfree_cg_type_array_elem(p->c, pointee); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + } + if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) { + CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee); + ty = elem_ty; ty_toy = toy_type_array_elem(p, toy_type_pointee(p, ty_toy)); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); } else { ty = pointee; ty_toy = toy_type_pointee(p, ty_toy); + toy_addr_index(p, cfree_cg_type_size(p->c, ty), + cfree_cg_type_ptr(p->c, ty, 0)); } } else if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_ARRAY) { - ty = cfree_cg_type_array_elem(p->c, ty); + CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, ty); + ty = elem_ty; ty_toy = toy_type_array_elem(p, ty_toy); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); } else if (toy_type_is_slice(p, ty_toy)) { ToyTypeId elem_toy = TOY_TYPE_NONE; ty = toy_emit_slice_index_lvalue(p, ty, ty_toy, &elem_toy); @@ -1113,7 +1143,6 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { toy_error(p, p->cur.loc, "cannot index non-array/non-pointer"); return CFREE_CG_TYPE_NONE; } - cfree_cg_index(p->cg, 0); continue; } if (toy_parser_match(p, TOK_DOTSTAR)) { @@ -1121,22 +1150,27 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { toy_error(p, p->cur.loc, "cannot dereference non-pointer"); return CFREE_CG_TYPE_NONE; } + /* TOS = **T; load to TOS = *T. */ + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){0, 0}); ty = cfree_cg_type_ptr_pointee(p->c, ty); ty_toy = toy_type_pointee(p, ty_toy); - cfree_cg_indirect(p->cg); continue; } if (toy_parser_match(p, TOK_DOT)) { CfreeCgField field; uint32_t field_index = 0; + uint64_t foff = 0; ToyNamedType* named; if (cfree_cg_type_kind(p->c, ty) == CFREE_CG_TYPE_PTR && cfree_cg_type_kind(p->c, cfree_cg_type_ptr_pointee(p->c, ty)) == CFREE_CG_TYPE_RECORD) { + /* TOS = **Rec; load to *Rec. */ + cfree_cg_load(p->cg, toy_mem_access(p, ty), + (CfreeCgEffAddr){0, 0}); ty = cfree_cg_type_ptr_pointee(p->c, ty); ty_toy = toy_type_pointee(p, ty_toy); - cfree_cg_indirect(p->cg); } if (cfree_cg_type_kind(p->c, ty) != CFREE_CG_TYPE_RECORD) { toy_error(p, p->cur.loc, "field access on non-record"); @@ -1153,7 +1187,7 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { field_index = (uint32_t)p->cur.int_value; toy_parser_advance(p); if (cfree_cg_type_record_field(p->c, ty, field_index, &field, - NULL) != 0) + &foff) != 0) return CFREE_CG_TYPE_NONE; } else { CfreeSym field_name; @@ -1168,17 +1202,21 @@ static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { toy_error(p, p->cur.loc, "unknown record field"); return CFREE_CG_TYPE_NONE; } + if (cfree_cg_type_record_field(p->c, ty, field_index, NULL, + &foff) != 0) + return CFREE_CG_TYPE_NONE; } - cfree_cg_field(p->cg, field_index); ty = field.type; ty_toy = (named && field_index < named->nfields) ? named->fields[field_index].toy_type : toy_type_from_cg(p, ty); + toy_addr_offset(p, (int64_t)foff, + cfree_cg_type_ptr(p->c, ty, 0)); continue; } break; } - cfree_cg_addr(p->cg); + /* TOS already holds a pointer-rvalue of type *ty. */ { CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(p->c, ty, 0); p->last_type = toy_type_register_ptr( @@ -1505,15 +1543,15 @@ static CfreeCgTypeId toy_parse_expr_and(ToyParser* p) { cfree_cg_branch_false(p->cg, false_label); cfree_cg_push_local(p->cg, result_slot); cfree_cg_push_int(p->cg, 1, bool_ty); - cfree_cg_store(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_jump(p->cg, end_label); cfree_cg_label_place(p->cg, false_label); cfree_cg_push_local(p->cg, result_slot); cfree_cg_push_int(p->cg, 0, bool_ty); - cfree_cg_store(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_label_place(p->cg, end_label); cfree_cg_push_local(p->cg, result_slot); - cfree_cg_load(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); ty = bool_ty; toy_note_cg_result_type(p, ty); } @@ -1544,15 +1582,15 @@ static CfreeCgTypeId toy_parse_expr_or(ToyParser* p) { cfree_cg_branch_true(p->cg, true_label); cfree_cg_push_local(p->cg, result_slot); cfree_cg_push_int(p->cg, 0, bool_ty); - cfree_cg_store(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_jump(p->cg, end_label); cfree_cg_label_place(p->cg, true_label); cfree_cg_push_local(p->cg, result_slot); cfree_cg_push_int(p->cg, 1, bool_ty); - cfree_cg_store(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); cfree_cg_label_place(p->cg, end_label); cfree_cg_push_local(p->cg, result_slot); - cfree_cg_load(p->cg, toy_mem_access(p, bool_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, bool_ty), (CfreeCgEffAddr){0, 0}); ty = bool_ty; toy_note_cg_result_type(p, ty); } diff --git a/lang/toy/internal.h b/lang/toy/internal.h @@ -403,6 +403,11 @@ ToyScope* toy_find_scope(ToyParser* p, CfreeSym name); ToyScope* toy_find_innermost_loop_scope(ToyParser* p); void toy_push_var_lvalue(ToyParser* p, const ToyVar* v); void toy_push_var_addr(ToyParser* p, const ToyVar* v); +/* Chain helpers: see comments in symbols.c. Both consume and produce + * pointer-rvalue TOS. */ +void toy_addr_offset(ToyParser* p, int64_t offset, CfreeCgTypeId result_ptr_ty); +void toy_addr_index(ToyParser* p, uint64_t elem_size, + CfreeCgTypeId result_ptr_ty); CfreeCgSym toy_find_decl_sym(ToyParser* p, CfreeSym name); int toy_parse_program(ToyParser* p); diff --git a/lang/toy/parser.c b/lang/toy/parser.c @@ -33,11 +33,12 @@ int toy_parse_block(ToyParser* p) { return 1; } -static void toy_push_local_index(ToyParser* p, CfreeCgLocal slot, - uint64_t index) { +/* Pushes [base, index] onto the value stack as the EA-shaped pair consumed + * by an indexed memop. Callers supply the element size in the store's EA. */ +static void toy_push_local_indexed(ToyParser* p, CfreeCgLocal slot, + uint64_t index) { cfree_cg_push_local(p->cg, slot); cfree_cg_push_int(p->cg, index, p->int_type); - cfree_cg_index(p->cg, 0); } static int toy_check_source_value(ToyParser* p, CfreeCgTypeId expected_cg, @@ -97,14 +98,15 @@ static int toy_copy_record_lvalue_to_local(ToyParser* p, CfreeCgTypeId src_ty, nfields = cfree_cg_type_record_nfields(p->c, dst_ty); for (i = 0; i < nfields; ++i) { CfreeCgField field; - if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, NULL)) return 0; + uint64_t offset = 0; + if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, &offset)) return 0; cfree_cg_dup(p->cg); - cfree_cg_field(p->cg, i); - cfree_cg_load(p->cg, toy_mem_access(p, field.type)); + cfree_cg_load(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)offset, 0}); cfree_cg_push_local(p->cg, dst_slot); - cfree_cg_field(p->cg, i); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)offset, 0}); } cfree_cg_drop(p->cg); return 1; @@ -123,18 +125,19 @@ static int toy_copy_record_lvalue_to_var(ToyParser* p, CfreeCgTypeId src_ty, nfields = cfree_cg_type_record_nfields(p->c, dst_ty); for (i = 0; i < nfields; ++i) { CfreeCgField field; - if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, NULL)) return 0; + uint64_t offset = 0; + if (cfree_cg_type_record_field(p->c, dst_ty, i, &field, &offset)) return 0; cfree_cg_dup(p->cg); - cfree_cg_field(p->cg, i); - cfree_cg_load(p->cg, toy_mem_access(p, field.type)); + cfree_cg_load(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)offset, 0}); if (dst_var) { toy_push_var_lvalue(p, dst_var); } else { - cfree_cg_push_symbol_lvalue(p->cg, dst_global->sym, 0); + cfree_cg_push_symbol_addr(p->cg, dst_global->sym, 0); } - cfree_cg_field(p->cg, i); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)offset, 0}); } cfree_cg_drop(p->cg); return 1; @@ -158,7 +161,7 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot, toy_error(p, p->cur.loc, "too many array elements"); return 0; } - toy_push_local_index(p, slot, index); + toy_push_local_indexed(p, slot, index); expr_ty = toy_parse_expr(p); if (expr_ty == CFREE_CG_TYPE_NONE) return 0; if (!toy_check_source_value(p, elem_ty, elem_toy_type, expr_ty, @@ -166,7 +169,8 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot, return 0; } if (expr_ty != elem_ty) cfree_cg_bitcast(p->cg, elem_ty); - cfree_cg_store(p->cg, toy_mem_access(p, elem_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, elem_ty), + (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, elem_ty)}); index++; if (!toy_parser_match(p, TOK_COMMA)) break; } @@ -175,9 +179,10 @@ static int toy_parse_array_initializer(ToyParser* p, CfreeCgLocal slot, return 0; } while (index < count) { - toy_push_local_index(p, slot, index); + toy_push_local_indexed(p, slot, index); cfree_cg_push_int(p->cg, 0, elem_ty); - cfree_cg_store(p->cg, toy_mem_access(p, elem_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, elem_ty), + (CfreeCgEffAddr){0, (uint32_t)cfree_cg_type_size(p->c, elem_ty)}); index++; } return 1; @@ -198,12 +203,13 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, for (i = 0; i < nfields; ++i) { CfreeCgField field; - if (cfree_cg_type_record_field(p->c, record_ty, i, &field, NULL) != 0) + uint64_t foff = 0; + if (cfree_cg_type_record_field(p->c, record_ty, i, &field, &foff) != 0) return 0; cfree_cg_push_local(p->cg, slot); - cfree_cg_field(p->cg, i); cfree_cg_push_int(p->cg, 0, field.type); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)foff, 0}); } if (positional) { @@ -211,15 +217,15 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, while (p->cur.kind != TOK_RBRACE && p->cur.kind != TOK_EOF) { CfreeCgField field; CfreeCgTypeId expr_ty; + uint64_t foff = 0; if (field_index >= nfields) { toy_error(p, p->cur.loc, "too many tuple fields"); return 0; } if (cfree_cg_type_record_field(p->c, record_ty, field_index, &field, - NULL) != 0) + &foff) != 0) return 0; cfree_cg_push_local(p->cg, slot); - cfree_cg_field(p->cg, field_index); expr_ty = toy_parse_expr(p); if (expr_ty == CFREE_CG_TYPE_NONE) return 0; { @@ -234,7 +240,8 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, } } if (expr_ty != field.type) cfree_cg_bitcast(p->cg, field.type); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)foff, 0}); field_index++; if (!toy_parser_match(p, TOK_COMMA)) break; } @@ -250,6 +257,7 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, CfreeCgField field; uint32_t field_index; CfreeCgTypeId expr_ty; + uint64_t foff = 0; if (p->cur.kind != TOK_IDENT) { toy_error(p, p->cur.loc, "expected field name"); return 0; @@ -264,8 +272,9 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, toy_error(p, p->cur.loc, "unknown record field"); return 0; } + if (cfree_cg_type_record_field(p->c, record_ty, field_index, NULL, &foff) != 0) + return 0; cfree_cg_push_local(p->cg, slot); - cfree_cg_field(p->cg, field_index); expr_ty = toy_parse_expr(p); if (expr_ty == CFREE_CG_TYPE_NONE) return 0; { @@ -280,7 +289,8 @@ static int toy_parse_record_initializer(ToyParser* p, CfreeCgLocal slot, } } if (expr_ty != field.type) cfree_cg_bitcast(p->cg, field.type); - cfree_cg_store(p->cg, toy_mem_access(p, field.type)); + cfree_cg_store(p->cg, toy_mem_access(p, field.type), + (CfreeCgEffAddr){(int64_t)foff, 0}); if (!toy_parser_match(p, TOK_COMMA)) break; } if (!toy_parser_expect(p, TOK_RBRACE)) { @@ -362,7 +372,7 @@ static int toy_parse_value_block_body_to_local(ToyParser* p, CfreeCgLocal slot, if (arm_ty != result_ty) cfree_cg_bitcast(p->cg, result_ty); cfree_cg_push_local(p->cg, slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, result_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, result_ty), (CfreeCgEffAddr){0, 0}); if (!toy_parser_expect(p, TOK_RBRACE)) { toy_error(p, p->cur.loc, "expected '}' after value block"); p->nvars = saved_nvars; @@ -448,7 +458,7 @@ static int toy_parse_switch_initializer(ToyParser* p, CfreeCgLocal slot, selector_slot = cfree_cg_local(p->cg, selector_ty, toy_slot_attrs(0)); cfree_cg_push_local(p->cg, selector_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, selector_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0}); end_label = cfree_cg_label_new(p->cg); dispatch_label = cfree_cg_label_new(p->cg); /* Skip the arm bodies on entry; come back through the dispatch label. */ @@ -531,7 +541,7 @@ static int toy_parse_switch_initializer(ToyParser* p, CfreeCgLocal slot, } cfree_cg_label_place(p->cg, dispatch_label); cfree_cg_push_local(p->cg, selector_slot); - cfree_cg_load(p->cg, toy_mem_access(p, selector_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0}); if (saw_default) { sw.default_label = default_arm_label; } else { @@ -639,7 +649,7 @@ static int toy_parse_while_initializer_named(ToyParser* p, CfreeCgLocal slot, p->nscopes--; cfree_cg_push_local(p->cg, slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, result_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, result_ty), (CfreeCgEffAddr){0, 0}); return 1; } @@ -828,7 +838,7 @@ static int toy_parse_let_stmt(ToyParser* p) { if (!toy_add_local_typed(p, name, ty, toy_ty, slot, is_var)) return 0; cfree_cg_push_local(p->cg, slot); cfree_cg_push_int(p->cg, (uint64_t)value, ty); - cfree_cg_store(p->cg, toy_mem_access(p, ty)); + cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); if (!toy_parser_expect(p, TOK_SEMI)) { toy_error(p, p->cur.loc, "expected ';' after let"); return 0; @@ -946,7 +956,7 @@ static int toy_parse_let_stmt(ToyParser* p) { } else { cfree_cg_push_local(p->cg, slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, ty)); + cfree_cg_store(p->cg, toy_mem_access(p, ty), (CfreeCgEffAddr){0, 0}); } } if (!toy_parser_expect(p, TOK_SEMI)) { @@ -1051,7 +1061,7 @@ static int toy_parse_switch_stmt_named(ToyParser* p, CfreeSym label_name) { selector_slot = cfree_cg_local(p->cg, selector_ty, toy_slot_attrs(0)); cfree_cg_push_local(p->cg, selector_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, selector_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0}); end_label = cfree_cg_label_new(p->cg); dispatch_label = cfree_cg_label_new(p->cg); if (!toy_parser_reserve(p, (void**)&p->scopes, &p->cap_scopes, @@ -1116,7 +1126,7 @@ static int toy_parse_switch_stmt_named(ToyParser* p, CfreeSym label_name) { } cfree_cg_label_place(p->cg, dispatch_label); cfree_cg_push_local(p->cg, selector_slot); - cfree_cg_load(p->cg, toy_mem_access(p, selector_ty)); + cfree_cg_load(p->cg, toy_mem_access(p, selector_ty), (CfreeCgEffAddr){0, 0}); sw.selector_type = selector_ty; sw.default_label = default_arm_label != CFREE_CG_LABEL_NONE ? default_arm_label @@ -1407,7 +1417,7 @@ static int toy_parse_return_stmt(ToyParser* p) { } } cfree_cg_push_local(p->cg, slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->cur_fn_ret)); + cfree_cg_load(p->cg, toy_mem_access(p, p->cur_fn_ret), (CfreeCgEffAddr){0, 0}); cfree_cg_ret(p->cg); if (!toy_parser_expect(p, TOK_SEMI)) { toy_error(p, p->cur.loc, "expected ';' after return"); @@ -1504,6 +1514,10 @@ static int toy_parse_stmt(ToyParser* p) { int root_mutable = 1; int lhs_slice_metadata = 0; toy_parser_advance(p); + /* Chain invariant: TOS holds a pointer-rvalue of type `*lhs_ty`. + * Intermediate field / index / dereference steps materialize the + * address via toy_addr_offset / toy_addr_index / explicit loads. + * The final store consumes the pointer with EA {0, 0}. */ { ToyVar* v = toy_find_var(p, name); ToyGlobal* g = toy_find_global(p, name); @@ -1516,17 +1530,14 @@ static int toy_parse_stmt(ToyParser* p) { lhs_toy_type = g->toy_type; root_mutable = g->mutable; } - if (v && (cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_ARRAY || - cfree_cg_type_kind(p->c, v->type) == CFREE_CG_TYPE_RECORD)) { - toy_push_var_lvalue(p, v); + if (v) { + toy_push_var_addr(p, v); lhs_ty = v->type; - } else if (g && - (cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_ARRAY || - cfree_cg_type_kind(p->c, g->type) == CFREE_CG_TYPE_RECORD)) { - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + } else if (g) { + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); lhs_ty = g->type; } else { - lhs_ty = toy_push_named_rvalue(p, name); + lhs_ty = CFREE_CG_TYPE_NONE; } } if (lhs_ty == CFREE_CG_TYPE_NONE) { @@ -1548,26 +1559,42 @@ static int toy_parse_stmt(ToyParser* p) { return 0; } if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_PTR) { + /* TOS = **T_chain (where lhs_ty = *T_chain). Load the pointer + * value so the index applies to the pointee. */ CfreeCgTypeId pointee = cfree_cg_type_ptr_pointee(p->c, lhs_ty); ToyTypeId source_pointee = toy_type_pointee(p, lhs_toy_type); - if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) { + /* index is currently on top; stash so we can load the pointer. */ + { CfreeCgLocal idx_slot = cfree_cg_local(p->cg, p->int_type, toy_slot_attrs(0)); cfree_cg_push_local(p->cg, idx_slot); cfree_cg_swap(p->cg); - cfree_cg_store(p->cg, toy_mem_access(p, p->int_type)); - cfree_cg_indirect(p->cg); + cfree_cg_store(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty), + (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(p->cg, idx_slot); - cfree_cg_load(p->cg, toy_mem_access(p, p->int_type)); - lhs_ty = cfree_cg_type_array_elem(p->c, pointee); + cfree_cg_load(p->cg, toy_mem_access(p, p->int_type), + (CfreeCgEffAddr){0, 0}); + } + if (cfree_cg_type_kind(p->c, pointee) == CFREE_CG_TYPE_ARRAY) { + CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, pointee); + lhs_ty = elem_ty; lhs_toy_type = toy_type_array_elem(p, source_pointee); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); } else { lhs_ty = pointee; lhs_toy_type = source_pointee; + toy_addr_index(p, cfree_cg_type_size(p->c, lhs_ty), + cfree_cg_type_ptr(p->c, lhs_ty, 0)); } } else if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_ARRAY) { - lhs_ty = cfree_cg_type_array_elem(p->c, lhs_ty); + CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, lhs_ty); + lhs_ty = elem_ty; lhs_toy_type = toy_type_array_elem(p, lhs_toy_type); + toy_addr_index(p, cfree_cg_type_size(p->c, elem_ty), + cfree_cg_type_ptr(p->c, elem_ty, 0)); } else if (toy_type_is_slice(p, lhs_toy_type)) { lhs_ty = toy_emit_slice_index_lvalue(p, lhs_ty, lhs_toy_type, &lhs_toy_type); @@ -1576,7 +1603,6 @@ static int toy_parse_stmt(ToyParser* p) { toy_error(p, p->cur.loc, "cannot index non-array/non-pointer"); return 0; } - cfree_cg_index(p->cg, 0); continue; } if (toy_parser_match(p, TOK_DOTSTAR)) { @@ -1585,22 +1611,27 @@ static int toy_parse_stmt(ToyParser* p) { toy_error(p, p->cur.loc, "cannot dereference non-pointer"); return 0; } + /* TOS = `**T`; load to TOS = `*T`. */ + cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty), + (CfreeCgEffAddr){0, 0}); lhs_ty = cfree_cg_type_ptr_pointee(p->c, lhs_ty); lhs_toy_type = toy_type_pointee(p, lhs_toy_type); - cfree_cg_indirect(p->cg); continue; } if (toy_parser_match(p, TOK_DOT)) { CfreeCgField field; uint32_t field_index = 0; ToyNamedType* named; + uint64_t foff = 0; if (cfree_cg_type_kind(p->c, lhs_ty) == CFREE_CG_TYPE_PTR && cfree_cg_type_kind(p->c, cfree_cg_type_ptr_pointee(p->c, lhs_ty)) == CFREE_CG_TYPE_RECORD) { + /* `p.field`: load the pointer value to address the record. */ + cfree_cg_load(p->cg, toy_mem_access(p, lhs_ty), + (CfreeCgEffAddr){0, 0}); lhs_ty = cfree_cg_type_ptr_pointee(p->c, lhs_ty); lhs_toy_type = toy_type_pointee(p, lhs_toy_type); - cfree_cg_indirect(p->cg); } if (cfree_cg_type_kind(p->c, lhs_ty) != CFREE_CG_TYPE_RECORD) { toy_error(p, p->cur.loc, "field assignment on non-record"); @@ -1617,7 +1648,7 @@ static int toy_parse_stmt(ToyParser* p) { field_index = (uint32_t)p->cur.int_value; toy_parser_advance(p); if (cfree_cg_type_record_field(p->c, lhs_ty, field_index, &field, - NULL) != 0) + &foff) != 0) return 0; } else { CfreeSym field_name; @@ -1632,14 +1663,17 @@ static int toy_parse_stmt(ToyParser* p) { toy_error(p, p->cur.loc, "unknown record field"); return 0; } + if (cfree_cg_type_record_field(p->c, lhs_ty, field_index, NULL, &foff) != 0) + return 0; } - cfree_cg_field(p->cg, field_index); lhs_slice_metadata = toy_type_is_slice(p, lhs_toy_type) && (field_index == 0 || field_index == 1); lhs_ty = field.type; lhs_toy_type = (named && field_index < named->nfields) ? named->fields[field_index].toy_type : TOY_TYPE_NONE; + toy_addr_offset(p, (int64_t)foff, + cfree_cg_type_ptr(p->c, lhs_ty, 0)); continue; } break; @@ -1672,7 +1706,7 @@ static int toy_parse_stmt(ToyParser* p) { toy_error(p, p->cur.loc, "type mismatch in assignment"); return 0; } - cfree_cg_store(p->cg, toy_mem_access(p, lhs_ty)); + cfree_cg_store(p->cg, toy_mem_access(p, lhs_ty), (CfreeCgEffAddr){0, 0}); } if (!toy_parser_expect(p, TOK_SEMI)) { toy_error(p, p->cur.loc, "expected ';' after assignment"); @@ -1712,7 +1746,7 @@ static int toy_parse_stmt(ToyParser* p) { toy_push_var_lvalue(p, v); cfree_cg_swap(p->cg); if (expr_ty != v->type) cfree_cg_bitcast(p->cg, v->type); - cfree_cg_store(p->cg, toy_mem_access(p, v->type)); + cfree_cg_store(p->cg, toy_mem_access(p, v->type), (CfreeCgEffAddr){0, 0}); } else { ToyGlobal* g = toy_find_global(p, name); if (!g) { @@ -1736,10 +1770,10 @@ static int toy_parse_stmt(ToyParser* p) { } return 1; } - cfree_cg_push_symbol_lvalue(p->cg, g->sym, 0); + cfree_cg_push_symbol_addr(p->cg, g->sym, 0); cfree_cg_swap(p->cg); if (expr_ty != g->type) cfree_cg_bitcast(p->cg, g->type); - cfree_cg_store(p->cg, toy_mem_access(p, g->type)); + cfree_cg_store(p->cg, toy_mem_access(p, g->type), (CfreeCgEffAddr){0, 0}); } } if (!toy_parser_expect(p, TOK_SEMI)) { diff --git a/lang/toy/symbols.c b/lang/toy/symbols.c @@ -191,9 +191,12 @@ ToyScope* toy_find_innermost_loop_scope(ToyParser* p) { return NULL; } +/* Pushes a base operand suitable for the canonical memops: for a frame-local + * this is the lvalue; for a static-local this is the symbol address (the + * memop accepts both shapes as `base`). */ void toy_push_var_lvalue(ToyParser* p, const ToyVar* v) { if (v->is_static) - cfree_cg_push_symbol_lvalue(p->cg, v->static_sym, 0); + cfree_cg_push_symbol_addr(p->cg, v->static_sym, 0); else cfree_cg_push_local(p->cg, v->local); } @@ -205,6 +208,43 @@ void toy_push_var_addr(ToyParser* p, const ToyVar* v) { cfree_cg_push_local_addr(p->cg, v->local); } +/* Address-chain helpers for the chained-lvalue paths in the toy frontend. + * + * The canonical CG memops carry only a single effective address. Field and + * index selectors that immediately precede a load/store therefore fold into + * the memop's `ea` directly. When the toy parser builds a multi-step chain + * (e.g. `&a.f[i].g`), each intermediate step must materialize the address + * with explicit pointer arithmetic so the next step sees a pointer rvalue + * it can compose with again. These helpers do that materialization. + * + * Inputs are pointer rvalues; outputs are pointer rvalues. Chain starts + * push the root pointer via `cfree_cg_push_local_addr` or + * `cfree_cg_push_symbol_addr` (or `cfree_cg_addr` after `push_local`). */ + +/* TOS: [base_ptr]. After: [base_ptr + offset] as `result_ptr_ty`. */ +void toy_addr_offset(ToyParser* p, int64_t offset, CfreeCgTypeId result_ptr_ty) { + if (offset != 0) { + cfree_cg_ptr_to_int(p->cg, p->int_type); + cfree_cg_push_int(p->cg, (uint64_t)offset, p->int_type); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0); + cfree_cg_int_to_ptr(p->cg, result_ptr_ty); + } else { + cfree_cg_bitcast(p->cg, result_ptr_ty); + } +} + +/* TOS: [base_ptr, index]. + * After: [base_ptr + index * elem_size] as `result_ptr_ty`. */ +void toy_addr_index(ToyParser* p, uint64_t elem_size, + CfreeCgTypeId result_ptr_ty) { + cfree_cg_push_int(p->cg, elem_size, p->int_type); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_MUL, 0); + cfree_cg_swap(p->cg); + cfree_cg_ptr_to_int(p->cg, p->int_type); + cfree_cg_int_binop(p->cg, CFREE_CG_INT_ADD, 0); + cfree_cg_int_to_ptr(p->cg, result_ptr_ty); +} + CfreeCgSym toy_find_decl_sym(ToyParser* p, CfreeSym name) { ToyGlobal* g = toy_find_global(p, name); if (g) return g->sym; diff --git a/lang/wasm/cg.c b/lang/wasm/cg.c @@ -34,6 +34,15 @@ static CfreeCgMemAccess wasm_cg_mem_type(CfreeCgTypeId ty) { return mem; } +/* Convenience: zero effective-address (no offset, no index). Used for + * load/store on a base TOS that already represents the exact address. */ +static CfreeCgEffAddr wasm_cg_ea0(void) { + CfreeCgEffAddr ea; + ea.offset = 0; + ea.scale = 0; + return ea; +} + static void wasm_cg_push_zero(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b, WasmValType vt) { CfreeCgTypeId ty = wasm_cg_type(c, b, vt); @@ -105,22 +114,26 @@ typedef struct WasmCgRuntime { CfreeCgTypeId table_ty; CfreeCgTypeId instance_ty; CfreeCgTypeId instance_ptr_ty; - uint32_t memory_field[64]; - uint32_t memory_data_field; - uint32_t memory_pages_field; - uint32_t memory_max_pages_field; - uint32_t memory_flags_field; - uint32_t func_import_field[64]; - uint32_t func_ref_entry_field[64]; - uint32_t global_field[64]; - uint32_t global_import_addr_field; - uint32_t table_field[64]; - uint32_t table_entries_field[64]; - uint32_t table_entry_fn_field; - uint32_t table_entry_typeidx_field; - uint32_t table_entries_ptr_field; - uint32_t table_len_field; - uint32_t table_max_field; + uint64_t table_entry_size; + /* Byte offsets within the instance struct for each top-level slot. */ + uint64_t memory_offset[64]; + uint64_t func_import_offset[64]; + uint64_t func_ref_entry_offset[64]; + uint64_t global_offset[64]; + uint64_t table_offset[64]; + uint64_t table_entries_offset[64]; + /* Byte offsets within their containing record. */ + uint64_t memory_data_offset; + uint64_t memory_pages_offset; + uint64_t memory_max_pages_offset; + uint64_t memory_flags_offset; + uint64_t func_import_fn_offset; + uint64_t global_import_addr_offset; + uint64_t table_entries_ptr_offset; + uint64_t table_len_offset; + uint64_t table_max_offset; + uint64_t table_entry_fn_offset; + uint64_t table_entry_typeidx_offset; CfreeCgTypeId trap_func_ty; CfreeCgSym trap_syms[WASM_TRAP_COUNT]; } WasmCgRuntime; @@ -192,6 +205,15 @@ static void wasm_indexed_name(char* name, size_t cap, const char* prefix, name[pos] = '\0'; } +static uint64_t wasm_cg_field_offset(CfreeCompiler* c, CfreeCgTypeId ty, + uint32_t index) { + uint64_t off = 0; + CfreeStatus st = cfree_cg_type_record_field(c, ty, index, NULL, &off); + if (st != CFREE_OK) + wasm_error(c, wasm_loc(0, 0), "wasm: failed to query field offset"); + return off; +} + static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, const WasmModule* m, WasmCgRuntime* rt) { CfreeCgField memory_fields[4]; @@ -201,6 +223,12 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, CfreeCgField table_fields[3]; CfreeCgField instance_fields[256]; uint32_t nfields = 0; + uint32_t memory_field_idx[64]; + uint32_t func_import_field_idx[64]; + uint32_t func_ref_entry_field_idx[64]; + uint32_t global_field_idx[64]; + uint32_t table_field_idx[64]; + uint32_t table_entries_field_idx[64]; memset(rt, 0, sizeof *rt); rt->i8_ptr_ty = cfree_cg_type_ptr(c, b.id[CFREE_CG_BUILTIN_I8], 0); rt->void_ptr_ty = rt->i8_ptr_ty; @@ -215,20 +243,23 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, memory_fields[3].type = b.id[CFREE_CG_BUILTIN_I32]; rt->memory_ty = cfree_cg_type_record( c, cfree_sym_intern(c, "CfreeWasmMemory"), memory_fields, 4); - rt->memory_data_field = 0; - rt->memory_pages_field = 1; - rt->memory_max_pages_field = 2; - rt->memory_flags_field = 3; + rt->memory_data_offset = wasm_cg_field_offset(c, rt->memory_ty, 0); + rt->memory_pages_offset = wasm_cg_field_offset(c, rt->memory_ty, 1); + rt->memory_max_pages_offset = wasm_cg_field_offset(c, rt->memory_ty, 2); + rt->memory_flags_offset = wasm_cg_field_offset(c, rt->memory_ty, 3); memset(func_import_fields, 0, sizeof func_import_fields); func_import_fields[0].name = cfree_sym_intern(c, "fn"); func_import_fields[0].type = rt->void_ptr_ty; rt->func_import_ty = cfree_cg_type_record( c, cfree_sym_intern(c, "CfreeWasmFuncImport"), func_import_fields, 1); + rt->func_import_fn_offset = wasm_cg_field_offset(c, rt->func_import_ty, 0); memset(global_import_fields, 0, sizeof global_import_fields); global_import_fields[0].name = cfree_sym_intern(c, "addr"); global_import_fields[0].type = rt->void_ptr_ty; rt->global_import_ty = cfree_cg_type_record( c, cfree_sym_intern(c, "CfreeWasmGlobalImport"), global_import_fields, 1); + rt->global_import_addr_offset = + wasm_cg_field_offset(c, rt->global_import_ty, 0); memset(table_entry_fields, 0, sizeof table_entry_fields); table_entry_fields[0].name = cfree_sym_intern(c, "fn"); table_entry_fields[0].type = rt->void_ptr_ty; @@ -237,8 +268,10 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, rt->table_entry_ty = cfree_cg_type_record( c, cfree_sym_intern(c, "CfreeWasmTableEntry"), table_entry_fields, 2); rt->table_entry_ptr_ty = cfree_cg_type_ptr(c, rt->table_entry_ty, 0); - rt->table_entry_fn_field = 0; - rt->table_entry_typeidx_field = 1; + rt->table_entry_fn_offset = wasm_cg_field_offset(c, rt->table_entry_ty, 0); + rt->table_entry_typeidx_offset = + wasm_cg_field_offset(c, rt->table_entry_ty, 1); + rt->table_entry_size = cfree_cg_type_size(c, rt->table_entry_ty); memset(table_fields, 0, sizeof table_fields); table_fields[0].name = cfree_sym_intern(c, "entries"); table_fields[0].type = rt->table_entry_ptr_ty; @@ -248,9 +281,9 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, table_fields[2].type = b.id[CFREE_CG_BUILTIN_I32]; rt->table_ty = cfree_cg_type_record(c, cfree_sym_intern(c, "CfreeWasmTable"), table_fields, 3); - rt->table_entries_ptr_field = 0; - rt->table_len_field = 1; - rt->table_max_field = 2; + rt->table_entries_ptr_offset = wasm_cg_field_offset(c, rt->table_ty, 0); + rt->table_len_offset = wasm_cg_field_offset(c, rt->table_ty, 1); + rt->table_max_offset = wasm_cg_field_offset(c, rt->table_ty, 2); memset(instance_fields, 0, sizeof instance_fields); for (uint32_t i = 0; i < m->nmemories; ++i) { char name[40]; @@ -260,7 +293,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, memcpy(name, "memory", sizeof "memory"); else wasm_indexed_name(name, sizeof name, "memory_", i); - rt->memory_field[i] = nfields; + memory_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = rt->memory_ty; nfields++; @@ -271,7 +304,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, if (nfields >= 256u) wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large"); wasm_indexed_name(name, sizeof name, "import_func_", i); - rt->func_import_field[i] = nfields; + func_import_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = rt->func_import_ty; nfields++; @@ -281,7 +314,7 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, if (nfields >= 256u) wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large"); wasm_indexed_name(name, sizeof name, "func_ref_", i); - rt->func_ref_entry_field[i] = nfields; + func_ref_entry_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = rt->table_entry_ty; nfields++; @@ -293,26 +326,25 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, wasm_indexed_name(name, sizeof name, m->globals[i].is_import ? "import_global_" : "global_", i); - rt->global_field[i] = nfields; + global_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = m->globals[i].is_import ? rt->global_import_ty : wasm_cg_type(c, b, m->globals[i].type); nfields++; } - rt->global_import_addr_field = 0; for (uint32_t i = 0; i < m->ntables; ++i) { char name[40]; uint32_t max = m->tables[i].has_max ? m->tables[i].max : m->tables[i].min; if (nfields + 2u > 256u) wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large"); wasm_indexed_name(name, sizeof name, "table_", i); - rt->table_field[i] = nfields; + table_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = rt->table_ty; nfields++; wasm_indexed_name(name, sizeof name, "table_entries_", i); - rt->table_entries_field[i] = nfields; + table_entries_field_idx[i] = nfields; instance_fields[nfields].name = cfree_sym_intern(c, name); instance_fields[nfields].type = cfree_cg_type_array(c, rt->table_entry_ty, max ? max : 1u); @@ -321,87 +353,94 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, rt->instance_ty = cfree_cg_type_record( c, cfree_sym_intern(c, "CfreeWasmInstance"), instance_fields, nfields); rt->instance_ptr_ty = cfree_cg_type_ptr(c, rt->instance_ty, 0); + for (uint32_t i = 0; i < m->nmemories; ++i) + rt->memory_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, memory_field_idx[i]); + for (uint32_t i = 0; i < m->nfuncs; ++i) { + if (m->funcs[i].is_import) + rt->func_import_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, func_import_field_idx[i]); + rt->func_ref_entry_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, func_ref_entry_field_idx[i]); + } + for (uint32_t i = 0; i < m->nglobals; ++i) + rt->global_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, global_field_idx[i]); + for (uint32_t i = 0; i < m->ntables; ++i) { + rt->table_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, table_field_idx[i]); + rt->table_entries_offset[i] = + wasm_cg_field_offset(c, rt->instance_ty, table_entries_field_idx[i]); + } } -static void wasm_cg_push_instance_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local) { +/* Push the instance pointer rvalue (loaded from the instance parameter slot). + * Stack: [] -> [void*]. Callers fold a struct offset into the memop's EA. */ +static void wasm_cg_push_instance_ptr(CfreeCg* cg, const WasmCgRuntime* rt, + CfreeCgLocal instance_local) { + CfreeCgEffAddr ea = {0, 0}; cfree_cg_push_local(cg, instance_local); - cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty)); - cfree_cg_indirect(cg); + cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty), ea); } -static void wasm_cg_push_memory_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t memidx) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->memory_field[memidx]); +/* Add a constant byte offset to the pointer rvalue on TOS, retyping to + * `result_ptr_ty`. No-op when offset == 0 (and the type is not retyped). */ +static void wasm_cg_ptr_add_offset(CfreeCg* cg, CfreeCgBuiltinTypes b, + uint64_t offset, + CfreeCgTypeId result_ptr_ty) { + CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64]; + if (offset == 0) { + cfree_cg_bitcast(cg, result_ptr_ty); + return; + } + cfree_cg_ptr_to_int(cg, i64_ty); + cfree_cg_push_int(cg, offset, i64_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_int_to_ptr(cg, result_ptr_ty); } +/* Load the i8* data pointer from instance->memories[memidx].data. */ static void wasm_cg_push_memory_data_ptr(CfreeCg* cg, const WasmCgRuntime* rt, CfreeCgLocal instance_local, uint32_t memidx) { - wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx); - cfree_cg_field(cg, rt->memory_data_field); - cfree_cg_load(cg, wasm_cg_mem_type(rt->i8_ptr_ty)); -} - -static void wasm_cg_push_memory_pages_lvalue(CfreeCg* cg, - const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t memidx) { - wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx); - cfree_cg_field(cg, rt->memory_pages_field); -} - -static void wasm_cg_push_memory_max_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t memidx) { - wasm_cg_push_memory_lvalue(cg, rt, instance_local, memidx); - cfree_cg_field(cg, rt->memory_max_pages_field); -} - -static void wasm_cg_push_global_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t global_index) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->global_field[global_index]); -} - -static void wasm_cg_push_import_func_lvalue(CfreeCg* cg, - const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t func_index) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->func_import_field[func_index]); + CfreeCgEffAddr ea; + ea.offset = + (int64_t)(rt->memory_offset[memidx] + rt->memory_data_offset); + ea.scale = 0; + wasm_cg_push_instance_ptr(cg, rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem_type(rt->i8_ptr_ty), ea); } +/* Load instance->import_funcs[func_index].fn as a void*. */ static void wasm_cg_push_import_func_ptr(CfreeCg* cg, const WasmCgRuntime* rt, CfreeCgLocal instance_local, uint32_t func_index) { - wasm_cg_push_import_func_lvalue(cg, rt, instance_local, func_index); - cfree_cg_field(cg, 0); - cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty)); + CfreeCgEffAddr ea; + ea.offset = (int64_t)(rt->func_import_offset[func_index] + + rt->func_import_fn_offset); + ea.scale = 0; + wasm_cg_push_instance_ptr(cg, rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), ea); } -static void wasm_cg_push_func_ref_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t func_index) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->func_ref_entry_field[func_index]); -} - -static void wasm_cg_push_global_value_lvalue(CfreeCompiler* c, CfreeCg* cg, - CfreeCgBuiltinTypes b, - const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - const WasmModule* m, - uint32_t global_index) { - wasm_cg_push_global_lvalue(cg, rt, instance_local, global_index); +/* Push a pointer rvalue to instance->globals[global_index]'s value cell, + * dereferencing the import indirection if needed. Result type is T* where T is + * the global's value type. */ +static void wasm_cg_push_global_value_ptr(CfreeCompiler* c, CfreeCg* cg, + CfreeCgBuiltinTypes b, + const WasmCgRuntime* rt, + CfreeCgLocal instance_local, + const WasmModule* m, + uint32_t global_index) { + CfreeCgTypeId ptr_ty = cfree_cg_type_ptr( + c, wasm_cg_type(c, b, m->globals[global_index].type), 0); if (m->globals[global_index].is_import) { - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr( - c, wasm_cg_type(c, b, m->globals[global_index].type), 0); - cfree_cg_field(cg, rt->global_import_addr_field); - cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty)); + CfreeCgEffAddr ea; + ea.offset = (int64_t)(rt->global_offset[global_index] + + rt->global_import_addr_offset); + ea.scale = 0; + wasm_cg_push_instance_ptr(cg, rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), ea); cfree_cg_dup(cg); cfree_cg_push_null(cg, rt->void_ptr_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); @@ -412,37 +451,39 @@ static void wasm_cg_push_global_value_lvalue(CfreeCompiler* c, CfreeCg* cg, cfree_cg_label_place(cg, ok); } cfree_cg_bitcast(cg, ptr_ty); - cfree_cg_indirect(cg); + } else { + wasm_cg_push_instance_ptr(cg, rt, instance_local); + wasm_cg_ptr_add_offset(cg, b, rt->global_offset[global_index], ptr_ty); } } -static void wasm_cg_push_table_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t table_index) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->table_field[table_index]); -} - -static void wasm_cg_push_table_entries_array_lvalue(CfreeCg* cg, - const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t table_index) { - wasm_cg_push_instance_lvalue(cg, rt, instance_local); - cfree_cg_field(cg, rt->table_entries_field[table_index]); -} - -static void wasm_cg_push_table_entry_lvalue(CfreeCg* cg, - const WasmCgRuntime* rt, - CfreeCgLocal instance_local, - uint32_t table_index, - CfreeCgLocal index_local, - CfreeCgMemAccess index_mem) { - wasm_cg_push_table_lvalue(cg, rt, instance_local, table_index); - cfree_cg_field(cg, rt->table_entries_ptr_field); - cfree_cg_load(cg, wasm_cg_mem_type(rt->table_entry_ptr_ty)); +/* Push a pointer rvalue to instance->tables[table_index].entries[index_local]. + * The index is loaded from a temp local supplied by the caller (mirrors the + * previous helper's signature). */ +static void wasm_cg_push_table_entry_ptr(CfreeCg* cg, CfreeCgBuiltinTypes b, + const WasmCgRuntime* rt, + CfreeCgLocal instance_local, + uint32_t table_index, + CfreeCgLocal index_local, + CfreeCgMemAccess index_mem) { + CfreeCgEffAddr ea; + CfreeCgEffAddr ea_idx = {0, 0}; + CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64]; + /* Load entries pointer from instance->tables[i].entries. */ + ea.offset = (int64_t)(rt->table_offset[table_index] + + rt->table_entries_ptr_offset); + ea.scale = 0; + wasm_cg_push_instance_ptr(cg, rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem_type(rt->table_entry_ptr_ty), ea); + /* Compute entries + index * sizeof(entry) into a pointer rvalue. */ + cfree_cg_ptr_to_int(cg, i64_ty); cfree_cg_push_local(cg, index_local); - cfree_cg_load(cg, index_mem); - cfree_cg_index(cg, 0); + cfree_cg_load(cg, index_mem, ea_idx); + cfree_cg_zext(cg, i64_ty); + cfree_cg_push_int(cg, rt->table_entry_size, i64_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_MUL, 0); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_int_to_ptr(cg, rt->table_entry_ptr_ty); } static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg, @@ -453,6 +494,7 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg, uint32_t width = wasm_mem_width(in->kind); uint64_t end = in->offset64 + width; CfreeCgLabel ok = cfree_cg_label_new(cg); + CfreeCgEffAddr pages_ea; uint32_t max_pages = (uint32_t)(m->memories[in->memidx].has_max ? m->memories[in->memidx].max_pages : m->memories[in->memidx].min_pages); @@ -462,8 +504,11 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg, } (void)c; cfree_cg_dup(cg); - wasm_cg_push_memory_pages_lvalue(cg, rt, instance_local, in->memidx); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + pages_ea.offset = + (int64_t)(rt->memory_offset[in->memidx] + rt->memory_pages_offset); + pages_ea.scale = 0; + wasm_cg_push_instance_ptr(cg, rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), pages_ea); cfree_cg_push_int(cg, 65536u, b.id[CFREE_CG_BUILTIN_I64]); cfree_cg_int_binop(cg, CFREE_CG_INT_MUL, 0); cfree_cg_push_int(cg, end, b.id[CFREE_CG_BUILTIN_I64]); @@ -479,12 +524,25 @@ static void wasm_cg_memory_check(CfreeCompiler* c, CfreeCg* cg, cfree_cg_label_place(cg, ok); } -static void wasm_cg_memory_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, - CfreeCgLocal instance_local, uint32_t memidx, - uint64_t offset) { +/* Compute the absolute address (data_ptr + addr_on_tos + offset) as a + * pointer rvalue. Stack: [addr] -> [void*]. */ +static void wasm_cg_memory_addr_from_tos(CfreeCg* cg, CfreeCgBuiltinTypes b, + const WasmCgRuntime* rt, + const WasmModule* m, + CfreeCgLocal instance_local, + uint32_t memidx, uint64_t offset) { + CfreeCgTypeId i64_ty = b.id[CFREE_CG_BUILTIN_I64]; + /* TOS: [addr]. */ + if (!m->memories[memidx].is64) cfree_cg_zext(cg, i64_ty); + if (offset) { + cfree_cg_push_int(cg, offset, i64_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + } + /* TOS: [addr + offset]. */ wasm_cg_push_memory_data_ptr(cg, rt, instance_local, memidx); - cfree_cg_swap(cg); - cfree_cg_index(cg, offset); + cfree_cg_ptr_to_int(cg, i64_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_int_to_ptr(cg, rt->i8_ptr_ty); } static void wasm_cg_rotate(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b, @@ -500,24 +558,24 @@ static void wasm_cg_rotate(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b, lhs = cfree_cg_local(cg, ty, attrs); cfree_cg_push_local(cg, rhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, lhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, lhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_int(cg, mask, ty); cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0); cfree_cg_int_binop(cg, right ? CFREE_CG_INT_LSHR : CFREE_CG_INT_SHL, 0); cfree_cg_push_local(cg, lhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_int(cg, 0, ty); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_int_binop(cg, CFREE_CG_INT_SUB, 0); cfree_cg_push_int(cg, mask, ty); cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0); @@ -540,12 +598,12 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg, lhs = cfree_cg_local(cg, ty, attrs); cfree_cg_push_local(cg, rhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, lhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_int(cg, 0, ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, ok); @@ -556,12 +614,12 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg, uint64_t min_val = vt == WASM_VAL_I32 ? UINT64_C(0x80000000) : UINT64_C(0x8000000000000000); cfree_cg_push_local(cg, lhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_int(cg, min_val, ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, no_overflow); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_int(cg, UINT64_MAX, ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, no_overflow); @@ -569,9 +627,9 @@ static void wasm_cg_checked_divrem(CfreeCompiler* c, CfreeCg* cg, cfree_cg_label_place(cg, no_overflow); } cfree_cg_push_local(cg, lhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_int_binop(cg, op, 0); } @@ -601,25 +659,25 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg, abs_bits = cfree_cg_local(cg, bit_ty, attrs); cfree_cg_push_local(cg, value); cfree_cg_swap(cg); - cfree_cg_store(cg, src_mem); + cfree_cg_store(cg, src_mem, wasm_cg_ea0()); cfree_cg_push_local(cg, bits); cfree_cg_push_local(cg, value); - cfree_cg_load(cg, src_mem); + cfree_cg_load(cg, src_mem, wasm_cg_ea0()); cfree_cg_bitcast(cg, bit_ty); - cfree_cg_store(cg, bit_mem); + cfree_cg_store(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_local(cg, abs_bits); cfree_cg_push_local(cg, bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, abs_mask, bit_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0); - cfree_cg_store(cg, bit_mem); + cfree_cg_store(cg, bit_mem, wasm_cg_ea0()); { CfreeCgLabel finite = cfree_cg_label_new(cg); cfree_cg_push_local(cg, abs_bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, inf_bits, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U); cfree_cg_branch_true(cg, finite); @@ -644,12 +702,12 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg, if (is_unsigned) { CfreeCgLabel nonnegative = cfree_cg_label_new(cg); cfree_cg_push_local(cg, abs_bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, 0, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); cfree_cg_branch_true(cg, nonnegative); cfree_cg_push_local(cg, bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, sign_mask, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U); cfree_cg_branch_true(cg, nonnegative); @@ -659,7 +717,7 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg, { CfreeCgLabel in_range = cfree_cg_label_new(cg); cfree_cg_push_local(cg, abs_bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, limit_bits, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U); cfree_cg_branch_true(cg, in_range); @@ -670,19 +728,19 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg, CfreeCgLabel negative = cfree_cg_label_new(cg); CfreeCgLabel in_range = cfree_cg_label_new(cg); cfree_cg_push_local(cg, bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, sign_mask, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_GE_U); cfree_cg_branch_true(cg, negative); cfree_cg_push_local(cg, abs_bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, limit_bits, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U); cfree_cg_branch_true(cg, in_range); wasm_cg_trap_invalid_conversion(cg, rt); cfree_cg_label_place(cg, negative); cfree_cg_push_local(cg, abs_bits); - cfree_cg_load(cg, bit_mem); + cfree_cg_load(cg, bit_mem, wasm_cg_ea0()); cfree_cg_push_int(cg, limit_bits, bit_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U); cfree_cg_branch_true(cg, in_range); @@ -691,7 +749,7 @@ static void wasm_cg_checked_trunc(CfreeCompiler* c, CfreeCg* cg, } cfree_cg_push_local(cg, value); - cfree_cg_load(cg, src_mem); + cfree_cg_load(cg, src_mem, wasm_cg_ea0()); if (is_unsigned) cfree_cg_float_to_uint(cg, dst_ty, CFREE_CG_ROUND_TOWARD_ZERO); else @@ -771,30 +829,30 @@ static void wasm_cg_call_func(CfreeCompiler* c, CfreeCg* cg, cfree_cg_local(cg, wasm_cg_type(c, b, f->params[param]), attrs); cfree_cg_push_local(cg, args[param]); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, f->params[param])); + cfree_cg_store(cg, wasm_cg_mem(c, b, f->params[param]), wasm_cg_ea0()); } if (f->is_import) { CfreeCgLabel ok = cfree_cg_label_new(cg); callee = cfree_cg_local(cg, rt->void_ptr_ty, attrs); cfree_cg_push_local(cg, callee); wasm_cg_push_import_func_ptr(cg, rt, instance_local, func_index); - cfree_cg_store(cg, wasm_cg_mem_type(rt->void_ptr_ty)); + cfree_cg_store(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0()); cfree_cg_push_local(cg, callee); - cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0()); cfree_cg_push_null(cg, rt->void_ptr_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, ok); wasm_cg_trap_table(cg, rt); cfree_cg_label_place(cg, ok); cfree_cg_push_local(cg, callee); - cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty), wasm_cg_ea0()); cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, func_type, 0)); } cfree_cg_push_local(cg, instance_local); - cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt->instance_ptr_ty), wasm_cg_ea0()); for (uint32_t p = 0; p < f->nparams; ++p) { cfree_cg_push_local(cg, args[p]); - cfree_cg_load(cg, wasm_cg_mem(c, b, f->params[p])); + cfree_cg_load(cg, wasm_cg_mem(c, b, f->params[p]), wasm_cg_ea0()); } if (f->is_import) cfree_cg_call( @@ -944,16 +1002,20 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, const WasmMemory* mem = &m->memories[i]; uint64_t max_pages = mem->has_max ? mem->max_pages : mem->min_pages; uint32_t flags = (mem->shared ? 1u : 0u) | (mem->is64 ? 2u : 0u); - wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, i); + CfreeCgEffAddr ea; + ea.scale = 0; + ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_pages_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, mem->min_pages, b.id[CFREE_CG_BUILTIN_I64]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); - wasm_cg_push_memory_max_lvalue(cg, &rt, instance_local, i); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea); + ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_max_pages_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, max_pages, b.id[CFREE_CG_BUILTIN_I64]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); - wasm_cg_push_memory_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.memory_flags_field); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea); + ea.offset = (int64_t)(rt.memory_offset[i] + rt.memory_flags_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, flags, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea); if (mem->data_init_len) { CfreeCgSym data_sym = cfree_cg_const_data( cg, mem->data, mem->data_init_len, 16, b.id[CFREE_CG_BUILTIN_I8]); @@ -964,38 +1026,45 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, } } for (i = 0; i < m->nfuncs; ++i) { - wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.table_entry_fn_field); + CfreeCgEffAddr ea; + ea.scale = 0; + ea.offset = + (int64_t)(rt.func_ref_entry_offset[i] + rt.table_entry_fn_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); if (m->funcs[i].is_import) { wasm_cg_push_import_func_ptr(cg, &rt, instance_local, i); } else { cfree_cg_push_symbol_addr(cg, syms[i], 0); cfree_cg_bitcast(cg, rt.void_ptr_ty); } - cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty)); - wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.table_entry_typeidx_field); + cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea); + ea.offset = (int64_t)(rt.func_ref_entry_offset[i] + + rt.table_entry_typeidx_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, m->funcs[i].typeidx, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea); } for (i = 0; i < m->ntables; ++i) { const WasmTable* t = &m->tables[i]; uint32_t max = t->has_max ? t->max : t->min; - wasm_cg_push_table_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.table_entries_ptr_field); - wasm_cg_push_table_entries_array_lvalue(cg, &rt, instance_local, i); - cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_index(cg, 0); - cfree_cg_addr(cg); - cfree_cg_store(cg, wasm_cg_mem_type(rt.table_entry_ptr_ty)); - wasm_cg_push_table_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.table_len_field); + CfreeCgEffAddr ea; + ea.scale = 0; + /* tables[i].entries = &instance->table_entries_arr[i][0]. The address + * of the entries array is instance + table_entries_offset[i]. */ + ea.offset = (int64_t)(rt.table_offset[i] + rt.table_entries_ptr_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + wasm_cg_ptr_add_offset(cg, b, rt.table_entries_offset[i], + rt.table_entry_ptr_ty); + cfree_cg_store(cg, wasm_cg_mem_type(rt.table_entry_ptr_ty), ea); + ea.offset = (int64_t)(rt.table_offset[i] + rt.table_len_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, t->min, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); - wasm_cg_push_table_lvalue(cg, &rt, instance_local, i); - cfree_cg_field(cg, rt.table_max_field); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea); + ea.offset = (int64_t)(rt.table_offset[i] + rt.table_max_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_int(cg, max, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea); } for (i = 0; i < m->nelems; ++i) { const WasmElemSegment* seg = &m->elems[i]; @@ -1009,37 +1078,48 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, slot_local = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], tmp_attrs); cfree_cg_push_local(cg, slot_local); cfree_cg_push_int(cg, slot, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); - wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, seg->tableidx, - slot_local, - wasm_cg_mem(c, b, WASM_VAL_I32)); - cfree_cg_field(cg, rt.table_entry_fn_field); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); + wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, seg->tableidx, + slot_local, + wasm_cg_mem(c, b, WASM_VAL_I32)); if (m->funcs[funcidx].is_import) { wasm_cg_push_import_func_ptr(cg, &rt, instance_local, funcidx); } else { cfree_cg_push_symbol_addr(cg, syms[funcidx], 0); cfree_cg_bitcast(cg, rt.void_ptr_ty); } - cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty)); - wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, seg->tableidx, - slot_local, - wasm_cg_mem(c, b, WASM_VAL_I32)); - cfree_cg_field(cg, rt.table_entry_typeidx_field); + { + CfreeCgEffAddr ea; + ea.scale = 0; + ea.offset = (int64_t)rt.table_entry_fn_offset; + cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea); + } + wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, seg->tableidx, + slot_local, + wasm_cg_mem(c, b, WASM_VAL_I32)); cfree_cg_push_int(cg, m->funcs[funcidx].typeidx, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + { + CfreeCgEffAddr ea; + ea.scale = 0; + ea.offset = (int64_t)rt.table_entry_typeidx_offset; + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), ea); + } } } for (i = 0; i < m->nglobals; ++i) { const WasmGlobal* g = &m->globals[i]; + CfreeCgEffAddr ea; if (g->is_import) continue; - wasm_cg_push_global_lvalue(cg, &rt, instance_local, i); + ea.scale = 0; + ea.offset = (int64_t)rt.global_offset[i]; + wasm_cg_push_instance_ptr(cg, &rt, instance_local); if (g->type == WASM_VAL_F32 || g->type == WASM_VAL_F64) cfree_cg_push_float(cg, g->init.fp, wasm_cg_type(c, b, g->type)); else cfree_cg_push_int(cg, (uint64_t)g->init.imm, wasm_cg_type(c, b, g->type)); - cfree_cg_store(cg, wasm_cg_mem(c, b, g->type)); + cfree_cg_store(cg, wasm_cg_mem(c, b, g->type), ea); } if (m->has_start) wasm_cg_call_func(c, cg, b, &m->funcs[m->start_func], &rt, @@ -1080,7 +1160,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_local(cg, wasm_cg_type(c, b, f->locals[j]), attrs); cfree_cg_push_local(cg, locals[f->nparams + j]); wasm_cg_push_zero(c, cg, b, f->locals[j]); - cfree_cg_store(cg, wasm_cg_mem(c, b, f->locals[j])); + cfree_cg_store(cg, wasm_cg_mem(c, b, f->locals[j]), wasm_cg_ea0()); } for (j = 0; j < f->ninsns; ++j) { WasmInsn in = f->insns[j]; @@ -1207,30 +1287,30 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_push_local(cg, cond); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); cfree_cg_push_local(cg, rhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, lhs); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, cond); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); cfree_cg_branch_false(cg, else_label); cfree_cg_push_local(cg, result); cfree_cg_push_local(cg, lhs); - cfree_cg_load(cg, mem); - cfree_cg_store(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_jump(cg, end_label); cfree_cg_label_place(cg, else_label); cfree_cg_push_local(cg, result); cfree_cg_push_local(cg, rhs); - cfree_cg_load(cg, mem); - cfree_cg_store(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_label_place(cg, end_label); cfree_cg_push_local(cg, result); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); } break; case WASM_INSN_I32_CONST: cfree_cg_push_int(cg, (uint64_t)(uint32_t)in.imm, @@ -1248,14 +1328,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, case WASM_INSN_LOCAL_GET: { uint32_t index = (uint32_t)in.imm; cfree_cg_push_local(cg, locals[index]); - cfree_cg_load(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + cfree_cg_load(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0()); break; } case WASM_INSN_LOCAL_SET: { uint32_t index = (uint32_t)in.imm; cfree_cg_push_local(cg, locals[index]); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0()); break; } case WASM_INSN_LOCAL_TEE: { @@ -1263,7 +1343,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_dup(cg); cfree_cg_push_local(cg, locals[index]); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + cfree_cg_store(cg, wasm_cg_mem(c, b, wasm_func_local_type(f, index)), wasm_cg_ea0()); break; } case WASM_INSN_CALL: @@ -1304,22 +1384,27 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_local(cg, wasm_cg_type(c, b, t->results[0]), attrs); cfree_cg_push_local(cg, selector); cfree_cg_swap(cg); - cfree_cg_store(cg, i32_mem); + cfree_cg_store(cg, i32_mem, wasm_cg_ea0()); for (uint32_t p = 0; p < t->nparams; ++p) { uint32_t param = t->nparams - 1u - p; args[param] = cfree_cg_local(cg, wasm_cg_type(c, b, t->params[param]), attrs); cfree_cg_push_local(cg, args[param]); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param])); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]), wasm_cg_ea0()); } ok = cfree_cg_label_new(cg); cfree_cg_push_local(cg, selector); - cfree_cg_load(cg, i32_mem); - wasm_cg_push_table_lvalue(cg, &rt, instance_local, in.align); - cfree_cg_field(cg, rt.table_len_field); - cfree_cg_load(cg, i32_mem); + cfree_cg_load(cg, i32_mem, wasm_cg_ea0()); + { + CfreeCgEffAddr ea_len; + ea_len.scale = 0; + ea_len.offset = + (int64_t)(rt.table_offset[in.align] + rt.table_len_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + cfree_cg_load(cg, i32_mem, ea_len); + } cfree_cg_int_cmp(cg, CFREE_CG_INT_LT_U); cfree_cg_branch_true(cg, ok); wasm_cg_trap_table(cg, &rt); @@ -1327,13 +1412,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, ok = cfree_cg_label_new(cg); cfree_cg_push_local(cg, callee); - wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, in.align, - selector, i32_mem); - cfree_cg_field(cg, rt.table_entry_fn_field); - cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty)); - cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty)); + wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, in.align, + selector, i32_mem); + { + CfreeCgEffAddr ea_fn; + ea_fn.scale = 0; + ea_fn.offset = (int64_t)rt.table_entry_fn_offset; + cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), ea_fn); + } + cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0()); cfree_cg_push_local(cg, callee); - cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0()); cfree_cg_push_null(cg, rt.void_ptr_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, ok); @@ -1341,10 +1430,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_label_place(cg, ok); ok = cfree_cg_label_new(cg); - wasm_cg_push_table_entry_lvalue(cg, &rt, instance_local, in.align, - selector, i32_mem); - cfree_cg_field(cg, rt.table_entry_typeidx_field); - cfree_cg_load(cg, i32_mem); + wasm_cg_push_table_entry_ptr(cg, b, &rt, instance_local, in.align, + selector, i32_mem); + { + CfreeCgEffAddr ea_ti; + ea_ti.scale = 0; + ea_ti.offset = (int64_t)rt.table_entry_typeidx_offset; + cfree_cg_load(cg, i32_mem, ea_ti); + } cfree_cg_push_int(cg, (uint32_t)in.imm, b.id[CFREE_CG_BUILTIN_I32]); cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); cfree_cg_branch_true(cg, ok); @@ -1352,13 +1445,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_label_place(cg, ok); cfree_cg_push_local(cg, callee); - cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt.void_ptr_ty), wasm_cg_ea0()); cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, indirect_func_type, 0)); cfree_cg_push_local(cg, instance_local); - cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty), wasm_cg_ea0()); for (uint32_t p = 0; p < t->nparams; ++p) { cfree_cg_push_local(cg, args[p]); - cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p])); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]), wasm_cg_ea0()); } cfree_cg_call(cg, t->nparams + 1u, indirect_func_type, (CfreeCgCallAttrs){ @@ -1371,9 +1464,9 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, } else if (t->nresults) { cfree_cg_push_local(cg, result); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0])); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0()); cfree_cg_push_local(cg, result); - cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0])); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0()); } break; } @@ -1381,10 +1474,10 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_push_null(cg, rt.void_ptr_ty); break; case WASM_INSN_REF_FUNC: - wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, - (uint32_t)in.imm); - cfree_cg_addr(cg); - cfree_cg_bitcast(cg, rt.void_ptr_ty); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + wasm_cg_ptr_add_offset(cg, b, + rt.func_ref_entry_offset[(uint32_t)in.imm], + rt.void_ptr_ty); break; case WASM_INSN_REF_IS_NULL: cfree_cg_push_null(cg, rt.void_ptr_ty); @@ -1410,18 +1503,18 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, callee = cfree_cg_local(cg, rt.void_ptr_ty, attrs); cfree_cg_push_local(cg, callee_ref); cfree_cg_swap(cg); - cfree_cg_store(cg, ref_mem); + cfree_cg_store(cg, ref_mem, wasm_cg_ea0()); for (uint32_t p = 0; p < t->nparams; ++p) { uint32_t param = t->nparams - 1u - p; args[param] = cfree_cg_local(cg, wasm_cg_type(c, b, t->params[param]), attrs); cfree_cg_push_local(cg, args[param]); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param])); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param]), wasm_cg_ea0()); } ok = cfree_cg_label_new(cg); cfree_cg_push_local(cg, callee_ref); - cfree_cg_load(cg, ref_mem); + cfree_cg_load(cg, ref_mem, wasm_cg_ea0()); cfree_cg_push_null(cg, rt.void_ptr_ty); cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); cfree_cg_branch_true(cg, ok); @@ -1430,11 +1523,14 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, ok = cfree_cg_label_new(cg); cfree_cg_push_local(cg, callee_ref); - cfree_cg_load(cg, ref_mem); + cfree_cg_load(cg, ref_mem, wasm_cg_ea0()); cfree_cg_bitcast(cg, rt.table_entry_ptr_ty); - cfree_cg_indirect(cg); - cfree_cg_field(cg, rt.table_entry_typeidx_field); - cfree_cg_load(cg, i32_mem); + { + CfreeCgEffAddr ea_ti; + ea_ti.scale = 0; + ea_ti.offset = (int64_t)rt.table_entry_typeidx_offset; + cfree_cg_load(cg, i32_mem, ea_ti); + } cfree_cg_push_int(cg, (uint32_t)in.imm, b.id[CFREE_CG_BUILTIN_I32]); cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); cfree_cg_branch_true(cg, ok); @@ -1443,12 +1539,15 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, cfree_cg_push_local(cg, callee); cfree_cg_push_local(cg, callee_ref); - cfree_cg_load(cg, ref_mem); + cfree_cg_load(cg, ref_mem, wasm_cg_ea0()); cfree_cg_bitcast(cg, rt.table_entry_ptr_ty); - cfree_cg_indirect(cg); - cfree_cg_field(cg, rt.table_entry_fn_field); - cfree_cg_load(cg, ref_mem); - cfree_cg_store(cg, ref_mem); + { + CfreeCgEffAddr ea_fn; + ea_fn.scale = 0; + ea_fn.offset = (int64_t)rt.table_entry_fn_offset; + cfree_cg_load(cg, ref_mem, ea_fn); + } + cfree_cg_store(cg, ref_mem, wasm_cg_ea0()); memset(ref_params, 0, sizeof ref_params); ref_params[0].type = rt.instance_ptr_ty; @@ -1465,13 +1564,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, result = cfree_cg_local(cg, wasm_cg_type(c, b, t->results[0]), attrs); cfree_cg_push_local(cg, callee); - cfree_cg_load(cg, ref_mem); + cfree_cg_load(cg, ref_mem, wasm_cg_ea0()); cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, ref_func_type, 0)); cfree_cg_push_local(cg, instance_local); - cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty)); + cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty), wasm_cg_ea0()); for (uint32_t p = 0; p < t->nparams; ++p) { cfree_cg_push_local(cg, args[p]); - cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p])); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p]), wasm_cg_ea0()); } cfree_cg_call( cg, t->nparams + 1u, ref_func_type, @@ -1484,25 +1583,27 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, } else if (t->nresults) { cfree_cg_push_local(cg, result); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0])); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0()); cfree_cg_push_local(cg, result); - cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0])); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0]), wasm_cg_ea0()); } break; } case WASM_INSN_GLOBAL_GET: { uint32_t index = (uint32_t)in.imm; - wasm_cg_push_global_value_lvalue(c, cg, b, &rt, instance_local, m, - index); - cfree_cg_load(cg, wasm_cg_mem(c, b, m->globals[index].type)); + wasm_cg_push_global_value_ptr(c, cg, b, &rt, instance_local, m, + index); + cfree_cg_load(cg, wasm_cg_mem(c, b, m->globals[index].type), + wasm_cg_ea0()); break; } case WASM_INSN_GLOBAL_SET: { uint32_t index = (uint32_t)in.imm; - wasm_cg_push_global_value_lvalue(c, cg, b, &rt, instance_local, m, - index); + wasm_cg_push_global_value_ptr(c, cg, b, &rt, instance_local, m, + index); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, m->globals[index].type)); + cfree_cg_store(cg, wasm_cg_mem(c, b, m->globals[index].type), + wasm_cg_ea0()); break; } case WASM_INSN_RETURN: @@ -1514,12 +1615,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, case WASM_INSN_DROP: cfree_cg_drop(cg); break; - case WASM_INSN_MEMORY_SIZE: - wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + case WASM_INSN_MEMORY_SIZE: { + CfreeCgEffAddr ea_pages; + ea_pages.scale = 0; + ea_pages.offset = + (int64_t)(rt.memory_offset[in.memidx] + rt.memory_pages_offset); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages); if (!m->memories[in.memidx].is64) cfree_cg_trunc(cg, b.id[CFREE_CG_BUILTIN_I32]); break; + } case WASM_INSN_MEMORY_GROW: { CfreeCgLocalAttrs attrs; CfreeCgLocal delta, old_pages, grow_result; @@ -1528,6 +1634,13 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, WasmValType page_vt = m->memories[in.memidx].is64 ? WASM_VAL_I64 : WASM_VAL_I32; CfreeCgTypeId page_ty = wasm_cg_type(c, b, page_vt); + CfreeCgEffAddr ea_pages, ea_max; + ea_pages.scale = 0; + ea_pages.offset = + (int64_t)(rt.memory_offset[in.memidx] + rt.memory_pages_offset); + ea_max.scale = 0; + ea_max.offset = (int64_t)(rt.memory_offset[in.memidx] + + rt.memory_max_pages_offset); memset(&attrs, 0, sizeof attrs); attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; delta = cfree_cg_local(cg, page_ty, attrs); @@ -1535,47 +1648,47 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, grow_result = cfree_cg_local(cg, page_ty, attrs); cfree_cg_push_local(cg, delta); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); cfree_cg_push_local(cg, old_pages); - wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0()); cfree_cg_push_local(cg, delta); - cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); if (!m->memories[in.memidx].is64) cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I64]); - wasm_cg_push_memory_max_lvalue(cg, &rt, instance_local, in.memidx); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_max); cfree_cg_push_local(cg, old_pages); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0()); cfree_cg_int_binop(cg, CFREE_CG_INT_SUB, 0); cfree_cg_int_cmp(cg, CFREE_CG_INT_LE_U); cfree_cg_branch_false(cg, fail); - wasm_cg_push_memory_pages_lvalue(cg, &rt, instance_local, in.memidx); + wasm_cg_push_instance_ptr(cg, &rt, instance_local); cfree_cg_push_local(cg, old_pages); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0()); cfree_cg_push_local(cg, delta); - cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); if (!m->memories[in.memidx].is64) cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I64]); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), ea_pages); cfree_cg_push_local(cg, grow_result); cfree_cg_push_local(cg, old_pages); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0()); if (!m->memories[in.memidx].is64) cfree_cg_trunc(cg, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); cfree_cg_jump(cg, done); cfree_cg_label_place(cg, fail); cfree_cg_push_local(cg, grow_result); cfree_cg_push_int(cg, UINT64_MAX, page_ty); - cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_store(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); cfree_cg_label_place(cg, done); cfree_cg_push_local(cg, grow_result); - cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt)); + cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt), wasm_cg_ea0()); break; } case WASM_INSN_ATOMIC_FENCE: @@ -1593,9 +1706,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, CfreeCgMemAccess mem = wasm_cg_mem_type(ty); mem.align = in.align; wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_addr(cg); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST); break; } @@ -1617,13 +1728,11 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, value_tmp = cfree_cg_local(cg, ty, attrs); cfree_cg_push_local(cg, value_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_addr(cg); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_push_local(cg, value_tmp); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_atomic_store(cg, mem, CFREE_CG_MO_SEQ_CST); break; } @@ -1650,13 +1759,11 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, value_tmp = cfree_cg_local(cg, ty, attrs); cfree_cg_push_local(cg, value_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_addr(cg); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_push_local(cg, value_tmp); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_atomic_rmw(cg, mem, wasm_atomic_rmw_op(in.kind), CFREE_CG_MO_SEQ_CST); break; @@ -1675,18 +1782,16 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, expected_tmp = cfree_cg_local(cg, ty, attrs); cfree_cg_push_local(cg, desired_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, expected_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_addr(cg); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_push_local(cg, expected_tmp); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_push_local(cg, desired_tmp); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_atomic_cmpxchg(cg, mem, CFREE_CG_MO_SEQ_CST, CFREE_CG_MO_SEQ_CST, 0); cfree_cg_drop(cg); @@ -1709,31 +1814,29 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, result_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs); cfree_cg_push_local(cg, timeout_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64), wasm_cg_ea0()); cfree_cg_push_local(cg, expected_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_addr(cg); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST); cfree_cg_push_local(cg, expected_tmp); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); cfree_cg_branch_true(cg, equal); cfree_cg_push_local(cg, result_tmp); cfree_cg_push_int(cg, 1, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); cfree_cg_jump(cg, done); cfree_cg_label_place(cg, equal); (void)timeout_tmp; cfree_cg_push_local(cg, result_tmp); cfree_cg_push_int(cg, 2, b.id[CFREE_CG_BUILTIN_I32]); - cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); cfree_cg_label_place(cg, done); cfree_cg_push_local(cg, result_tmp); - cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32), wasm_cg_ea0()); break; } case WASM_INSN_MEMORY_ATOMIC_NOTIFY: { @@ -1745,7 +1848,7 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, count_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs); cfree_cg_push_local(cg, count_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, i32_mem); + cfree_cg_store(cg, i32_mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); cfree_cg_drop(cg); cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]); @@ -1771,9 +1874,8 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, mem.type = storage; mem.align = in.align; wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); - cfree_cg_load(cg, mem); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); + cfree_cg_load(cg, mem, wasm_cg_ea0()); if (storage != result) { if (in.kind == WASM_INSN_I32_LOAD8_S || in.kind == WASM_INSN_I32_LOAD16_S || @@ -1811,18 +1913,17 @@ void wasm_emit_cg(CfreeCompiler* c, const CfreeCodeOptions* code_opts, addr_tmp = cfree_cg_local(cg, wasm_cg_type(c, b, addr_vt), attrs); cfree_cg_push_local(cg, value_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, wasm_cg_ea0()); wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); cfree_cg_push_local(cg, addr_tmp); cfree_cg_swap(cg); - cfree_cg_store(cg, wasm_cg_mem(c, b, addr_vt)); + cfree_cg_store(cg, wasm_cg_mem(c, b, addr_vt), wasm_cg_ea0()); cfree_cg_push_local(cg, addr_tmp); - cfree_cg_load(cg, wasm_cg_mem(c, b, addr_vt)); - wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, - in.offset64); + cfree_cg_load(cg, wasm_cg_mem(c, b, addr_vt), wasm_cg_ea0()); + wasm_cg_memory_addr_from_tos(cg, b, &rt, m, instance_local, in.memidx, in.offset64); cfree_cg_push_local(cg, value_tmp); - cfree_cg_load(cg, mem); - cfree_cg_store(cg, mem); + cfree_cg_load(cg, mem, wasm_cg_ea0()); + cfree_cg_store(cg, mem, wasm_cg_ea0()); break; } case WASM_INSN_I32_ADD: diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -1197,6 +1197,10 @@ static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) { case 3: /* %aN — memory addressing form */ if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); + /* Inline asm consumes a plain pointer-shaped address; the cg + * contract guarantees no EA index here. */ + if (op->v.ind.index != REG_NONE) + inline_panic(a, "%a operand has unexpected EA index"); render_indirect(sb, op->v.ind.base, op->v.ind.ofs); return; default: @@ -1211,6 +1215,8 @@ static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) { render_imm(sb, op->v.imm); return; case OPK_INDIRECT: + if (op->v.ind.index != REG_NONE) + inline_panic(a, "inline-asm operand has unexpected EA index"); render_indirect(sb, op->v.ind.base, op->v.ind.ofs); return; default: diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -156,40 +156,145 @@ static int use_got_for_sym(CGTarget* t, ObjSymId sym) { return obj_symbol_extern_via_got(t->c, t->obj, sym); } -static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { +/* Effective-address descriptor produced by addr_mode. Mirrors the + * Operand.v.ind shape after any required fixups (offset folded into a + * scratch register when out of range, GLOBAL materialized into a register). + * `index == REG_NONE` means plain base+offset; otherwise the indexed + * register-offset form should be used and ofs is always 0. */ +typedef struct AAAddrMode { + u32 base; /* physical register holding the base */ + u32 index; /* physical register holding the index, or REG_NONE */ + u32 log2_scale; /* 0..3 — only valid when index != REG_NONE */ + i32 ofs; /* signed displacement; 0 when index != REG_NONE */ +} AAAddrMode; + +/* Resolve an Operand addressing form to an AAAddrMode usable by the + * load/store emitters. Handles all base kinds (LOCAL, INDIRECT, GLOBAL) + * and folds out-of-range offsets through `tmp_reg` via + * aa64_emit_addr_adjust, matching the prior addr_base contract. + * + * When the input INDIRECT carries an index, this routine preserves it in + * the result. If a nonzero displacement is also present, it is added to + * the base via the temp register so the indexed register-offset + * instruction (which encodes no displacement) can use {tmp, index, 0}. */ +static AAAddrMode addr_mode(CGTarget* t, Operand addr, u32 tmp_reg) { AAImpl* a = impl_of(t); + AAAddrMode m; + m.base = 0u; + m.index = REG_NONE; + m.log2_scale = 0u; + m.ofs = 0; + if (addr.kind == OPK_LOCAL) { AASlot* s = aa64_slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot"); + if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_mode: bad slot"); i32 off = -(i32)s->off; if (off >= -256 && off <= 255) { - *out_off = off; - return 29; + m.base = 29u; + m.ofs = off; + } else { + aa64_emit_addr_adjust(t->mc, tmp_reg, 29u, off); + m.base = tmp_reg; + m.ofs = 0; } - aa64_emit_addr_adjust(t->mc, tmp_reg, 29, off); - *out_off = 0; - return tmp_reg; + return m; } if (addr.kind == OPK_INDIRECT) { i32 off = addr.v.ind.ofs; - u32 base = addr.v.ind.base & 0x1f; - if (off >= -256 && off <= 255) { - *out_off = off; - return base; + u32 base = addr.v.ind.base & 0x1fu; + Reg idx = addr.v.ind.index; + if (idx == REG_NONE) { + if (off >= -256 && off <= 255) { + m.base = base; + m.ofs = off; + } else { + aa64_emit_addr_adjust(t->mc, tmp_reg, base, off); + m.base = tmp_reg; + m.ofs = 0; + } + return m; + } + /* Indexed: fold any displacement into the base so the indexed + * register-offset instruction can encode just {base, index, scale}. */ + if (off != 0) { + aa64_emit_addr_adjust(t->mc, tmp_reg, base, off); + m.base = tmp_reg; + } else { + m.base = base; } - aa64_emit_addr_adjust(t->mc, tmp_reg, base, off); - *out_off = 0; - return tmp_reg; + m.index = (u32)idx & 0x1fu; + m.log2_scale = addr.v.ind.log2_scale & 0x3u; + m.ofs = 0; + return m; } if (addr.kind == OPK_GLOBAL) { emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend); - *out_off = 0; - return tmp_reg; + m.base = tmp_reg; + m.ofs = 0; + return m; } - compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d", + compiler_panic(t->c, a->loc, "aarch64 addr_mode: unsupported kind %d", (int)addr.kind); } +/* Assert that an Operand consumed by a non-load/store path carries no + * EA index. Per doc/INDIRECT.md the cg layer never routes an indexed + * OPK_INDIRECT to spill/reload, bitfield, atomics, copy_bytes/set_bytes, + * inline asm, or addr_of; the assert catches upstream misrouting before + * it silently produces incorrect addressing. */ +static inline void aa_assert_no_index(CGTarget* t, Operand addr, + const char* where) { + if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 %s: OPK_INDIRECT with index unexpected", where); + } +} + +/* LDR (register), 32-bit option=LSL. Encodes + * LDR<size> Wt|Xt, [Xn, Xm{, LSL #amt}] (integer) + * where size in {0..3} selects byte/half/word/double; opc=01 (load). + * S=0 -> no shift (amt=0); S=1 -> shift by `size` (amt=size). + * The aarch64 register-offset addressing mode supports only those two + * shift amounts (other values must be lowered upstream). */ +static inline u32 aa64_ldr_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x38606800u | (size << 30) | ((Rm & 0x1fu) << 16) | + ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} +static inline u32 aa64_str_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x38206800u | (size << 30) | ((Rm & 0x1fu) << 16) | + ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} +static inline u32 aa64_ldr_fp_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x3C606800u | (size << 30) | ((Rm & 0x1fu) << 16) | + ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} +static inline u32 aa64_str_fp_reg(u32 size, u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x3C206800u | (size << 30) | ((Rm & 0x1fu) << 16) | + ((S & 1u) << 12) | ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} +/* 128-bit Q register-offset variants (size encoded as size=00, opc bit + * pattern 11 selects 128b). */ +static inline u32 aa64_ldr_q_reg(u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x3CE06800u | ((Rm & 0x1fu) << 16) | ((S & 1u) << 12) | + ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} +static inline u32 aa64_str_q_reg(u32 Rt, u32 Rn, u32 Rm, u32 S) { + return 0x3CA06800u | ((Rm & 0x1fu) << 16) | ((S & 1u) << 12) | + ((Rn & 0x1fu) << 5) | (Rt & 0x1fu); +} + +/* True if `log2_scale` is legal for the aarch64 register-offset form at + * a given access size index (sidx). The encoding supports S=0 (LSL #0) + * and S=1 (LSL #sidx) — any other scale must be lowered by adding + * `index << log2_scale` into the base via arch_lower_indexed before the + * load/store. */ +static inline int aa_indexed_scale_legal(u32 sidx, u32 log2_scale, + u32* S_out) { + if (log2_scale == 0u) { *S_out = 0u; return 1; } + if (log2_scale == sidx) { *S_out = 1u; return 1; } + return 0; +} + void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { u32 sz = ma.size ? ma.size : type_byte_size(addr.type); u32 sidx = size_idx_for_bytes(sz); @@ -222,12 +327,35 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { return; } - i32 off; - u32 base = addr_base(t, addr, &off, AA_TMP0); + /* Indexed register-offset form: emit `LDR Rt, [Rn, Rm{, LSL #s}]` when + * the EA carries an index and the scale matches the encoding (S=0 → + * LSL #0, S=1 → LSL #sidx). Otherwise fall back to + * arch_lower_indexed, which materializes base+(index<<scale) into a + * scratch and gives us a plain base+disp shape. */ + if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) { + u32 S; + if (aa_indexed_scale_legal(sidx, addr.v.ind.log2_scale & 0x3u, &S)) { + AAAddrMode m = addr_mode(t, addr, AA_TMP0); + if (dst.cls == RC_FP) { + if (sidx == 4u) + aa64_emit32(t->mc, aa64_ldr_q_reg(reg_num(dst), m.base, m.index, S)); + else + aa64_emit32(t->mc, + aa64_ldr_fp_reg(sidx, reg_num(dst), m.base, m.index, S)); + } else { + aa64_emit32(t->mc, + aa64_ldr_reg(sidx, reg_num(dst), m.base, m.index, S)); + } + return; + } + addr = arch_lower_indexed(t, addr, AA_TMP0); + } + + AAAddrMode m = addr_mode(t, addr, AA_TMP0); if (dst.cls == RC_FP) { - aa_emit_ldr_fp_any(t->mc, sidx, reg_num(dst), base, off); + aa_emit_ldr_fp_any(t->mc, sidx, reg_num(dst), m.base, m.ofs); } else { - aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off)); + aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), m.base, m.ofs)); } } @@ -243,7 +371,13 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { u32 src_reg; u32 src_is_fp = 0; - if (src.kind == OPK_IMM) { + /* Zero immediate stores use wzr/xzr directly (reg 31). Avoids a + * separate `mov wN, #0` and frees AA_TMP0 for the address base. */ + int src_imm_zero = (src.kind == OPK_IMM && src.v.imm == 0 && + src.cls != RC_FP); + if (src_imm_zero) { + src_reg = 31u; + } else if (src.kind == OPK_IMM) { u32 sf = (sz == 8) ? 1u : 0u; aa64_emit_load_imm(mc, sf, AA_TMP0, src.v.imm); src_reg = AA_TMP0; @@ -253,7 +387,7 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { } else { src_reg = reg_num(src); } - u32 base = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0; + u32 base = (src.kind == OPK_IMM && !src_imm_zero) ? AA_TMP1 : AA_TMP0; if (use_got_for_sym(t, sym)) { aa64_emit_got_load_addr(t, base, sym); if (src_is_fp) { @@ -277,20 +411,60 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { return; } - i32 off; - u32 addr_tmp = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0; - u32 base = addr_base(t, addr, &off, addr_tmp); + /* Zero immediate stores use wzr/xzr directly (reg 31). */ + int src_imm_zero = (src.kind == OPK_IMM && src.v.imm == 0 && + src.cls != RC_FP); + u32 addr_tmp = + (src.kind == OPK_IMM && !src_imm_zero) ? AA_TMP1 : AA_TMP0; + + /* Indexed register-offset form for STR when the EA's scale is legal. + * Falls back to arch_lower_indexed when LSL doesn't fit the + * instruction encoding (e.g. byte access with log2_scale=3). */ + if (addr.kind == OPK_INDIRECT && addr.v.ind.index != REG_NONE) { + u32 S; + if (aa_indexed_scale_legal(sidx, addr.v.ind.log2_scale & 0x3u, &S)) { + AAAddrMode m = addr_mode(t, addr, addr_tmp); + u32 src_reg; + if (src_imm_zero) { + src_reg = 31u; + } else if (src.kind == OPK_IMM) { + u32 sf = (sz == 8) ? 1u : 0u; + aa64_emit_load_imm(t->mc, sf, AA_TMP0, src.v.imm); + src_reg = AA_TMP0; + } else { + src_reg = reg_num(src); + } + if (src.cls == RC_FP && !src_imm_zero) { + if (sidx == 4u) + aa64_emit32(t->mc, aa64_str_q_reg(src_reg, m.base, m.index, S)); + else + aa64_emit32(t->mc, + aa64_str_fp_reg(sidx, src_reg, m.base, m.index, S)); + } else { + aa64_emit32(t->mc, + aa64_str_reg(sidx, src_reg, m.base, m.index, S)); + } + return; + } + addr = arch_lower_indexed(t, addr, addr_tmp); + } + + AAAddrMode m = addr_mode(t, addr, addr_tmp); + if (src_imm_zero) { + aa64_emit32(t->mc, aa64_stur(sidx, 31u, m.base, m.ofs)); + return; + } if (src.kind == OPK_IMM) { u32 sf = (sz == 8) ? 1u : 0u; aa64_emit_load_imm(t->mc, sf, AA_TMP0, src.v.imm); - aa64_emit32(t->mc, aa64_stur(sidx, AA_TMP0, base, off)); + aa64_emit32(t->mc, aa64_stur(sidx, AA_TMP0, m.base, m.ofs)); return; } if (src.cls == RC_FP) { - aa_emit_str_fp_any(t->mc, sidx, reg_num(src), base, off); + aa_emit_str_fp_any(t->mc, sidx, reg_num(src), m.base, m.ofs); } else { - aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off)); + aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), m.base, m.ofs)); } } @@ -303,6 +477,7 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) { return; } if (lv.kind == OPK_INDIRECT) { + aa_assert_no_index(t, lv, "addr_of"); i32 ofs = lv.v.ind.ofs; u32 base = lv.v.ind.base & 0x1f; aa64_emit_addr_adjust(t->mc, reg_num(dst), base, ofs); @@ -454,6 +629,20 @@ static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { aa64_emit_addr_adjust(t->mc, scratch, 29, -(i32)s->off); return scratch; } + if (op.kind == OPK_GLOBAL) { + emit_global_addr(t, scratch, op.v.global.sym, op.v.global.addend); + return scratch; + } + if (op.kind == OPK_INDIRECT) { + /* Aggregate helpers (copy_bytes/set_bytes, bitfield_*) take plain + * pointer addresses; the cg contract guarantees no EA index here. */ + aa_assert_no_index(t, op, "agg address"); + u32 base = op.v.ind.base & 0x1fu; + i32 ofs = op.v.ind.ofs; + if (ofs == 0) return base; + aa64_emit_addr_adjust(t->mc, scratch, base, ofs); + return scratch; + } compiler_panic(t->c, impl_of(t)->loc, "aarch64 agg: address kind %d unsupported", (int)op.kind); } @@ -917,6 +1106,7 @@ static Operand aa_call_stack_arg_addr(CGTarget* t, u32 stack_offset, addr.kind = OPK_INDIRECT; addr.cls = RC_INT; addr.v.ind.base = tail && !a->omit_frame ? 29u : 31u; + addr.v.ind.index = REG_NONE; addr.v.ind.ofs = (i32)stack_offset; if (tail && !a->omit_frame) addr.v.ind.ofs += 16; return addr; @@ -1064,14 +1254,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, break; } case OPK_INDIRECT: { + aa_assert_no_index(t, av->storage, "call INT arg storage"); Operand src; memset(&src, 0, sizeof src); src.kind = OPK_INDIRECT; src.v.ind.base = av->storage.v.ind.base; + src.v.ind.index = REG_NONE; src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, AA_TMP0); - aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off)); + AAAddrMode m = addr_mode(t, src, AA_TMP0); + aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, m.base, m.ofs)); break; } default: @@ -1106,14 +1297,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, break; } case OPK_INDIRECT: { + aa_assert_no_index(t, av->storage, "call FP arg storage"); Operand src; memset(&src, 0, sizeof src); src.kind = OPK_INDIRECT; src.v.ind.base = av->storage.v.ind.base; + src.v.ind.index = REG_NONE; src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, AA_TMP0); - aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, base, off); + AAAddrMode m = addr_mode(t, src, AA_TMP0); + aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, m.base, m.ofs); break; } default: @@ -1137,14 +1329,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, break; } case OPK_INDIRECT: { + aa_assert_no_index(t, av->storage, "call FP stack-arg storage"); Operand src; memset(&src, 0, sizeof src); src.kind = OPK_INDIRECT; src.v.ind.base = av->storage.v.ind.base; + src.v.ind.index = REG_NONE; src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, AA_TMP0); - aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, base, off); + AAAddrMode m = addr_mode(t, src, AA_TMP0); + aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, m.base, m.ofs); aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz, *stack_off, tail); break; @@ -1439,6 +1632,7 @@ static Operand aa_call_plan_offset_operand(CGTarget* t, Operand op, u32 offset) { if (!offset) return op; if (op.kind == OPK_INDIRECT) { + aa_assert_no_index(t, op, "call plan offset operand"); op.v.ind.ofs += (i32)offset; } else if (op.kind == OPK_LOCAL) { AAImpl* a = impl_of(t); @@ -1446,6 +1640,8 @@ static Operand aa_call_plan_offset_operand(CGTarget* t, Operand op, if (!s) compiler_panic(t->c, a->loc, "aarch64 call plan: bad slot"); op.kind = OPK_INDIRECT; op.v.ind.base = 29; + op.v.ind.index = REG_NONE; + op.v.ind.log2_scale = 0; op.v.ind.ofs = -(i32)s->off + (i32)offset; } return op; @@ -1931,6 +2127,7 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, MCEmitter* mc = t->mc; u32 sf = (ma.size == 8) ? 1u : 0u; + aa_assert_no_index(t, addr, "atomic_load"); u32 base; if (addr.kind == OPK_REG) { base = reg_num(addr); @@ -1939,6 +2136,14 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot"); base = AA_TMP0; aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off); + } else if (addr.kind == OPK_INDIRECT) { + AAAddrMode m = addr_mode(t, addr, AA_TMP0); + if (m.ofs != 0) { + aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs); + base = AA_TMP0; + } else { + base = m.base; + } } else { compiler_panic(t->c, a->loc, "aarch64 atomic_load: addr kind %d unsupported", @@ -1969,6 +2174,7 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src, "aarch64 atomic_store: src kind %d unsupported", (int)src.kind); } + aa_assert_no_index(t, addr, "atomic_store"); u32 base; if (addr.kind == OPK_REG) { base = reg_num(addr); @@ -1977,6 +2183,14 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src, if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot"); base = AA_TMP0; aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off); + } else if (addr.kind == OPK_INDIRECT) { + AAAddrMode m = addr_mode(t, addr, AA_TMP0); + if (m.ofs != 0) { + aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs); + base = AA_TMP0; + } else { + base = m.base; + } } else { compiler_panic(t->c, a->loc, "aarch64 atomic_store: addr kind %d unsupported", @@ -2015,6 +2229,7 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, MCEmitter* mc = t->mc; u32 sf = (ma.size == 8) ? 1u : 0u; + aa_assert_no_index(t, addr, "atomic_rmw"); u32 base = AA_TMP0; if (addr.kind == OPK_REG) { aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr))); @@ -2022,6 +2237,10 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot"); aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off); + } else if (addr.kind == OPK_INDIRECT) { + AAAddrMode m = addr_mode(t, addr, AA_TMP0); + if (m.base != AA_TMP0 || m.ofs != 0) + aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported", (int)addr.kind); @@ -2068,6 +2287,7 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, u32 sf = (ma.size == 8) ? 1u : 0u; (void)fail; + aa_assert_no_index(t, addr, "atomic_cas"); u32 base = AA_TMP0; if (addr.kind == OPK_REG) aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr))); @@ -2075,6 +2295,10 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot"); aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off); + } else if (addr.kind == OPK_INDIRECT) { + AAAddrMode m = addr_mode(t, addr, AA_TMP0); + if (m.base != AA_TMP0 || m.ofs != 0) + aa64_emit_addr_adjust(mc, AA_TMP0, m.base, m.ofs); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported", (int)addr.kind); diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -302,6 +302,8 @@ typedef struct Operand { } global; struct { Reg base; + Reg index; /* REG_NONE when no index operand */ + u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when index == REG_NONE */ i32 ofs; } ind; } v; @@ -1008,6 +1010,13 @@ CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); void cgtarget_finalize(CGTarget*); void cgtarget_free(CGTarget*); +/* Helper for backends without a native indexed addressing mode. If addr has + * an index (addr.v.ind.index != REG_NONE), materializes + * base + (index << log2_scale) into `scratch` and returns a plain + * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller + * supplies the scratch register from its scratch pool. */ +Operand arch_lower_indexed(CGTarget*, Operand addr, Reg scratch); + /* ---- Disassembler hook ---- * Bytes -> records, not frontend-driven lowering, so this is a separate * hook from CGTarget/MCEmitter. The internal implementation may share diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -421,6 +421,60 @@ static void c_emit_imm_literal(CTarget* t, i64 v) { cbuf_put_i64(&t->body, v); } +/* Address-mode tuple decoded from an OPK_INDIRECT operand. Mirrors the + * `addr_mode` helper in the machine-code backends so all targets share a + * single in-backend view of `base [+ index << log2_scale] + ofs`. */ +typedef struct CAddrMode { + Reg base; + Reg index; /* REG_NONE when no index operand */ + u8 log2_scale; /* meaningful only when index != REG_NONE */ + i32 ofs; +} CAddrMode; + +static CAddrMode c_addr_mode(Operand addr) { + CAddrMode m; + m.base = addr.v.ind.base; + m.index = addr.v.ind.index; + m.log2_scale = addr.v.ind.log2_scale; + m.ofs = addr.v.ind.ofs; + return m; +} + +/* Emit `(char*)base [+ (uintptr_t)index * (1u << log2_scale)] [+ ofs]` into + * the body, with each optional term suppressed when absent. Used by every + * OPK_INDIRECT renderer; the caller wraps it with the appropriate + * `(*(T*)(...))` or `((T)(...))` cast. */ +static void c_emit_indirect_addr_expr(CTarget* t, CAddrMode m) { + char rbuf[24]; + cbuf_puts(&t->body, "(char*)"); + c_reg_name(m.base, rbuf, sizeof rbuf); + cbuf_puts(&t->body, rbuf); + if (m.index != REG_NONE) { + cbuf_puts(&t->body, " + (uintptr_t)"); + c_reg_name(m.index, rbuf, sizeof rbuf); + cbuf_puts(&t->body, rbuf); + cbuf_puts(&t->body, " * "); + /* Spell as the explicit 1/2/4/8 literal corresponding to log2_scale. + * log2_scale is normalized to {0,1,2,3} by cg. */ + cbuf_put_u64(&t->body, (u64)(1u << m.log2_scale)); + } + if (m.ofs != 0) { + cbuf_puts(&t->body, " + "); + cbuf_put_i64(&t->body, (i64)m.ofs); + } +} + +/* Assert that `addr`, if OPK_INDIRECT, has no index operand. Used by paths + * the cg layer guarantees never carry the indexed shape (bitfield, atomics, + * copy_bytes/set_bytes, inline asm). */ +static void c_assert_no_index(CTarget* t, Operand addr, const char* where) { + if (addr.kind != OPK_INDIRECT) return; + if (addr.v.ind.index == REG_NONE) return; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + compiler_panic(t->c, loc, + "C target: %s: indexed OPK_INDIRECT not allowed here", where); +} + void c_emit_operand(CTarget* t, Operand op) { char buf[24]; switch (op.kind) { @@ -467,18 +521,12 @@ void c_emit_operand(CTarget* t, Operand op) { } case OPK_INDIRECT: { /* Used by call paths to pass aggregates by-address: the operand's type - * is the aggregate, the storage is (base + offset). Emit the deref as - * a value expression. */ + * is the aggregate, the storage is `base + index*scale + ofs`. Emit the + * deref as a value expression. */ cbuf_puts(&t->body, "(*("); c_emit_type(t, &t->body, op.type); - cbuf_puts(&t->body, "*)((char*)"); - char rbuf[24]; - c_reg_name(op.v.ind.base, rbuf, sizeof rbuf); - cbuf_puts(&t->body, rbuf); - if (op.v.ind.ofs != 0) { - cbuf_puts(&t->body, " + "); - cbuf_put_i64(&t->body, (i64)op.v.ind.ofs); - } + cbuf_puts(&t->body, "*)("); + c_emit_indirect_addr_expr(t, c_addr_mode(op)); cbuf_puts(&t->body, "))"); return; } @@ -777,29 +825,24 @@ static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_typ return; } case OPK_INDIRECT: { - Operand base_reg; - base_reg.kind = OPK_REG; - base_reg.cls = RC_INT; - base_reg.type = 0; - base_reg.v.reg = addr.v.ind.base; + CAddrMode m = c_addr_mode(addr); /* Ensure the base reg is declared. We can't readily look up its type * post-hoc, so reuse whatever it was first declared with. */ - if ((u32)addr.v.ind.base >= t->reg_cap || - !t->reg_declared[addr.v.ind.base]) { + if ((u32)m.base >= t->reg_cap || !t->reg_declared[m.base]) { compiler_panic(t->c, loc, "C target: indirect on undeclared base reg v%u", - (unsigned)addr.v.ind.base); + (unsigned)m.base); + } + if (m.index != REG_NONE && + ((u32)m.index >= t->reg_cap || !t->reg_declared[m.index])) { + compiler_panic(t->c, loc, + "C target: indirect on undeclared index reg v%u", + (unsigned)m.index); } cbuf_puts(&t->body, "(*("); c_emit_type(t, &t->body, access_type); - cbuf_puts(&t->body, "*)((char*)"); - char rbuf[24]; - c_reg_name(addr.v.ind.base, rbuf, sizeof rbuf); - cbuf_puts(&t->body, rbuf); - if (addr.v.ind.ofs != 0) { - cbuf_puts(&t->body, " + "); - cbuf_put_i64(&t->body, (i64)addr.v.ind.ofs); - } + cbuf_puts(&t->body, "*)("); + c_emit_indirect_addr_expr(t, m); cbuf_puts(&t->body, "))"); return; } @@ -842,13 +885,8 @@ static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) { case OPK_INDIRECT: { cbuf_puts(&t->body, "(("); c_emit_type(t, &t->body, dst_type); - cbuf_puts(&t->body, ")((char*)"); - c_reg_name(lv.v.ind.base, buf, sizeof buf); - cbuf_puts(&t->body, buf); - if (lv.v.ind.ofs != 0) { - cbuf_puts(&t->body, " + "); - cbuf_put_i64(&t->body, (i64)lv.v.ind.ofs); - } + cbuf_puts(&t->body, ")("); + c_emit_indirect_addr_expr(t, c_addr_mode(lv)); cbuf_puts(&t->body, "))"); return; } @@ -2606,6 +2644,8 @@ void c_va_arg(CGTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) { void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr, AggregateAccess m) { CTarget* t = (CTarget*)T; + c_assert_no_index(t, dst_addr, "copy_bytes dst"); + c_assert_no_index(t, src_addr, "copy_bytes src"); cbuf_puts(&t->body, " __builtin_memcpy("); c_emit_operand(t, dst_addr); cbuf_puts(&t->body, ", "); @@ -2618,6 +2658,7 @@ void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr, void c_set_bytes(CGTarget* T, Operand dst_addr, Operand byte_value, AggregateAccess m) { CTarget* t = (CTarget*)T; + c_assert_no_index(t, dst_addr, "set_bytes dst"); cbuf_puts(&t->body, " __builtin_memset("); c_emit_operand(t, dst_addr); cbuf_puts(&t->body, ", (int)"); @@ -2700,6 +2741,7 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, if (dst.kind != OPK_REG) { compiler_panic(t->c, loc, "C target: bitfield_load dst must be REG"); } + c_assert_no_index(t, addr, "bitfield_load"); if (bf.bit_width == 0) { /* Zero-width — layout barrier only; nothing to load. Emit a no-op * assignment so the dst reg still gets a defined value. */ @@ -2771,6 +2813,7 @@ void c_bitfield_store(CGTarget* T, Operand addr, Operand src, BitFieldAccess bf) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + c_assert_no_index(t, addr, "bitfield_store"); if (bf.bit_width == 0) return; /* zero-width: no-op */ const char* sty = c_bf_storage_type(bf.storage.size); if (!sty) { @@ -2860,6 +2903,8 @@ void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs, u32 no, Operand* oo, const AsmConstraint* ins, u32 ni, const Operand* io, const Sym* clobs, u32 nc) { CTarget* t = (CTarget*)T; + for (u32 i = 0; i < no; ++i) c_assert_no_index(t, oo[i], "asm_block out"); + for (u32 i = 0; i < ni; ++i) c_assert_no_index(t, io[i], "asm_block in"); cbuf_puts(&t->body, " __asm__ __volatile__ ("); c_emit_c_string_literal(&t->body, tmpl ? tmpl : ""); /* Outputs. */ @@ -2992,6 +3037,7 @@ void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m, MemOrder o) { CTarget* t = (CTarget*)T; (void)m; + c_assert_no_index(t, addr, "atomic_load"); c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); /* __atomic_load_n returns a value of the pointed-to type (dst.type). */ c_emit_reg_assign_open(t, dst.v.reg, dst.type); @@ -3009,6 +3055,7 @@ void c_atomic_store(CGTarget* T, Operand addr, Operand src, MemAccess m, MemOrder o) { CTarget* t = (CTarget*)T; (void)m; + c_assert_no_index(t, addr, "atomic_store"); cbuf_puts(&t->body, " __atomic_store_n(("); c_emit_type(t, &t->body, src.type); cbuf_puts(&t->body, "*)"); @@ -3038,6 +3085,7 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, CTarget* t = (CTarget*)T; (void)m; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + c_assert_no_index(t, addr, "atomic_rmw"); const char* fn = c_atomic_op_builtin(op); if (!fn) { compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op); @@ -3063,6 +3111,7 @@ void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr, MemOrder so, MemOrder fo) { CTarget* t = (CTarget*)T; (void)m; + c_assert_no_index(t, addr, "atomic_cas"); /* gcc's __atomic_compare_exchange_n needs a real lvalue holding the * expected value (it's updated on failure). We can't use `&prior_reg` * directly because CG reuses reg ids across types — the C declaration may diff --git a/src/arch/cgtarget.c b/src/arch/cgtarget.c @@ -3,7 +3,10 @@ * The lifecycle helpers (cgtarget_finalize, cgtarget_free) are arch-agnostic * shims over the vtable. */ +#include <string.h> + #include "arch/arch.h" +#include "cg/type.h" CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { const ArchImpl* arch = arch_for_compiler(c); @@ -25,3 +28,77 @@ void cgtarget_free(CGTarget* t) { if (!t) return; /* Arena-backed; nothing to free. */ } + +/* Default fold for backends without a native indexed addressing mode. + * + * If `addr` carries an index register (addr.v.ind.index != REG_NONE), + * materialize `base + (index << log2_scale)` into `scratch` and return a plain + * OPK_INDIRECT(scratch, ofs) with `index == REG_NONE`. Otherwise return `addr` + * unchanged. + * + * The caller supplies `scratch` from its own scratch pool and is responsible + * for freeing it after the memop completes. The returned operand keeps the + * caller's `type` so the backend's load/store sees the same access type. */ +Operand arch_lower_indexed(CGTarget* t, Operand addr, Reg scratch) { + Operand scratch_op; + Operand shifted; + CfreeCgTypeId ty; + u8 log2; + + if (addr.kind != OPK_INDIRECT || addr.v.ind.index == REG_NONE) return addr; + + ty = builtin_id(CFREE_CG_BUILTIN_I64); + log2 = addr.v.ind.log2_scale; + scratch_op.kind = OPK_REG; + scratch_op.cls = RC_INT; + scratch_op.pad = 0; + scratch_op.type = ty; + scratch_op.v.reg = scratch; + + shifted.kind = OPK_REG; + shifted.cls = RC_INT; + shifted.pad = 0; + shifted.type = ty; + shifted.v.reg = addr.v.ind.index; + + if (log2 == 0) { + /* index * 1: just add the index directly to the base. */ + Operand base_op; + base_op.kind = OPK_REG; + base_op.cls = RC_INT; + base_op.pad = 0; + base_op.type = ty; + base_op.v.reg = addr.v.ind.base; + t->binop(t, BO_IADD, scratch_op, base_op, shifted); + } else { + /* scratch = index << log2 */ + Operand shamt; + Operand base_op; + shamt.kind = OPK_IMM; + shamt.cls = RC_INT; + shamt.pad = 0; + shamt.type = ty; + shamt.v.imm = (i64)log2; + t->binop(t, BO_SHL, scratch_op, shifted, shamt); + base_op.kind = OPK_REG; + base_op.cls = RC_INT; + base_op.pad = 0; + base_op.type = ty; + base_op.v.reg = addr.v.ind.base; + /* scratch = base + scratch */ + t->binop(t, BO_IADD, scratch_op, base_op, scratch_op); + } + + { + Operand out; + memset(&out, 0, sizeof out); + out.kind = OPK_INDIRECT; + out.cls = RC_INT; + out.type = ty; + out.v.ind.base = scratch; + out.v.ind.index = REG_NONE; + out.v.ind.log2_scale = 0; + out.v.ind.ofs = addr.v.ind.ofs; + return out; + } +} diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -398,7 +398,12 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { rs1 = parse_xreg(d); return enc_i(m, 0u, rs1, 0); } - rd = parse_xreg(d); expect_comma(d); + rd = parse_xreg(d); + if (!asm_driver_eat_comma(d)) { + if (!strcmp(desc->mnemonic, "jalr")) + return enc_i(m, RV_RA, rd, 0); + asm_driver_panic(d, "rv64 asm: expected ','"); + } /* Accept both `jalr rd, imm(rs1)` and `jalr rd, rs1, imm`. */ { AsmTok t = asm_driver_peek(d); @@ -732,6 +737,9 @@ static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) { case 3: /* %aN — memory addressing form */ if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); + if (op->v.ind.index != REG_NONE) + inline_panic(a, "%a on indexed memory operand: rv64 inline asm " + "requires base+disp only"); render_indirect(a, sb, op->v.ind.base, op->v.ind.ofs); return; case 4: /* %zN — zero-or-reg */ @@ -757,6 +765,9 @@ static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) { render_imm(sb, op->v.imm); return; case OPK_INDIRECT: + if (op->v.ind.index != REG_NONE) + inline_panic(a, "indexed memory operand in inline asm: rv64 requires " + "base+disp only"); render_indirect(a, sb, op->v.ind.base, op->v.ind.ofs); return; default: diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -166,7 +166,21 @@ void rv_load(CGTarget *t, Operand dst, Operand addr, MemAccess ma); void rv_store(CGTarget *t, Operand addr, Operand src, MemAccess ma); u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off); u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off); -u32 addr_base(CGTarget *t, Operand addr, i32 *out_off, u32 tmp_reg); + +/* Effective-address tuple returned by addr_mode: `base + (index << log2_scale) + * + ofs`, where `index == REG_NONE` means no index operand. rv64 has no + * indexed load/store instructions even with Zba, so load/store fold any + * index into a scratch register up front via Zba `sh{1,2,3}add` (see + * rv_fold_indexed in ops.c); other paths (atomics, spill/reload, ...) + * assert that input OPK_INDIRECT operands already have `index == REG_NONE`. */ +typedef struct RvAddrMode { + u32 base; + u32 index; + u8 log2_scale; + i32 ofs; +} RvAddrMode; + +RvAddrMode addr_mode(CGTarget *t, Operand addr, u32 tmp_reg); void rv64_emit_addr_adjust(MCEmitter *mc, u32 rd, u32 base, i32 off); ObjSymId emit_pcrel_anchor(CGTarget *t, u32 sec, u32 auipc_pos); void rv64_emit_got_load_addr(CGTarget *t, u32 dst_reg, ObjSymId sym); diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h @@ -171,6 +171,15 @@ static inline u32 rv_divuw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, r static inline u32 rv_remw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, rs1, 0x6, rd, RV_OP_32); } static inline u32 rv_remuw(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x01, rs2, rs1, 0x7, rd, RV_OP_32); } +/* Zba (address-generation) subset — assumed available on rv64 targets. + * SH{1,2,3}ADD rd, rs1, rs2 computes rd = (rs1 << {1,2,3}) + rs2 in one + * instruction (funct7=0x10, opcode=OP). Used by load/store to fold an + * indexed effective address `base + (index << log2_scale)` into a single + * scratch register without an explicit shift+add pair. */ +static inline u32 rv_sh1add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x2, rd, RV_OP); } +static inline u32 rv_sh2add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x4, rd, RV_OP); } +static inline u32 rv_sh3add(u32 rd, u32 rs1, u32 rs2) { return rv_r(0x10, rs2, rs1, 0x6, rd, RV_OP); } + /* Loads (funct3: 0=LB,1=LH,2=LW,3=LD,4=LBU,5=LHU,6=LWU) */ static inline u32 rv_lb(u32 rd, u32 rs1, i32 imm) { return rv_i(imm, rs1, 0x0, rd, RV_LOAD); } static inline u32 rv_lh(u32 rd, u32 rs1, i32 imm) { return rv_i(imm, rs1, 0x1, rd, RV_LOAD); } diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -129,38 +129,57 @@ static void rv_copy(CGTarget* t, Operand dst, Operand src) { /* ---- address resolution ---- */ -/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into - * `tmp_reg`. Returns the register holding the base and writes the - * effective signed offset to *out_off (0 when we synthesized into tmp). - * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */ -u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { +/* Materialize the address of `addr` (LOCAL or INDIRECT) into a + * base-register + signed-offset pair, possibly using `tmp_reg` when the + * raw offset exceeds the imm[11:0] range. The returned tuple carries an + * optional index (`REG_NONE` for "no index"); rv64 has no indexed loads + * or stores even with Zba, so callers must have already folded any index + * away (load/store do this via rv_fold_indexed). OPK_GLOBAL is not + * handled here — its callers emit AUIPC + an LO12 reloc on the load/store + * directly. */ +RvAddrMode addr_mode(CGTarget* t, Operand addr, u32 tmp_reg) { RImpl* a = impl_of(t); + RvAddrMode am = {0}; + am.index = REG_NONE; if (addr.kind == OPK_LOCAL) { RvSlot* s = rv64_slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot"); + if (!s) compiler_panic(t->c, a->loc, "rv64 addr_mode: bad slot"); i32 off = -(i32)s->off; if (off >= -2048 && off <= 2047) { - *out_off = off; - return RV_S0; + am.base = RV_S0; + am.ofs = off; + return am; } rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off); rv64_emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg)); - *out_off = 0; - return tmp_reg; + am.base = tmp_reg; + am.ofs = 0; + return am; } if (addr.kind == OPK_INDIRECT) { + /* This helper does not encode an index — rv64 has no indexed + * load/store even with Zba. Load/store fold the index via + * rv_fold_indexed before calling here; all other paths take + * pointer-only operands. */ + if (addr.v.ind.index != REG_NONE) { + compiler_panic(t->c, a->loc, + "rv64 addr_mode: indexed addressing not supported here " + "(caller must fold via rv_fold_indexed)"); + } i32 off = addr.v.ind.ofs; u32 base = addr.v.ind.base & 0x1f; if (off >= -2048 && off <= 2047) { - *out_off = off; - return base; + am.base = base; + am.ofs = off; + return am; } rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off); rv64_emit32(t->mc, rv_add(tmp_reg, base, tmp_reg)); - *out_off = 0; - return tmp_reg; + am.base = tmp_reg; + am.ofs = 0; + return am; } - compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported", + compiler_panic(t->c, a->loc, "rv64 addr_mode: kind %d unsupported", (int)addr.kind); } @@ -219,6 +238,33 @@ void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) { rv64_emit32(mc, rv_add(rd, base, RV_T1)); } +/* Fold an indexed OPK_INDIRECT into a plain base+disp by emitting one Zba + * `sh{1,2,3}add` (or a plain `add` when log2_scale == 0) into `scratch`. + * Returns an OPK_INDIRECT(scratch, ofs) with `index = REG_NONE`. When the + * input has no index the operand is returned unchanged. Zba is assumed + * available on rv64 targets — no feature gate. */ +static Operand rv_fold_indexed(CGTarget* t, Operand addr, u32 scratch) { + if (addr.kind != OPK_INDIRECT || addr.v.ind.index == REG_NONE) return addr; + u32 base = addr.v.ind.base & 0x1fu; + u32 index = addr.v.ind.index & 0x1fu; + u8 s = addr.v.ind.log2_scale; + MCEmitter* mc = t->mc; + /* sh{1,2,3}add rd, rs1, rs2 = (rs1 << s) + rs2, so rs1=index, rs2=base. */ + switch (s) { + case 0: rv64_emit32(mc, rv_add (scratch, base, index)); break; + case 1: rv64_emit32(mc, rv_sh1add(scratch, index, base )); break; + case 2: rv64_emit32(mc, rv_sh2add(scratch, index, base )); break; + case 3: rv64_emit32(mc, rv_sh3add(scratch, index, base )); break; + default: compiler_panic(t->c, impl_of(t)->loc, + "rv64 rv_fold_indexed: bad log2_scale %u", + (u32)s); + } + addr.v.ind.base = scratch; + addr.v.ind.index = REG_NONE; + addr.v.ind.log2_scale = 0; + return addr; +} + void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { u32 sz = ma.size ? ma.size : type_byte_size(addr.type); MCEmitter* mc = t->mc; @@ -259,14 +305,16 @@ void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { return; } - i32 off; - u32 base = addr_base(t, addr, &off, RV_T0); + /* Fold any index via Zba sh{1,2,3}add into RV_T0 first; addr_mode then + * sees a plain base+disp. */ + addr = rv_fold_indexed(t, addr, RV_T0); + RvAddrMode am = addr_mode(t, addr, RV_T0); if (dst.cls == RC_FP) { - if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), base, off)); - else rv64_emit32(mc, rv_flw(reg_num(dst), base, off)); + if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), am.base, am.ofs)); + else rv64_emit32(mc, rv_flw(reg_num(dst), am.base, am.ofs)); } else { int sx = type_is_signed(addr.type); - rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off)); + rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), am.base, am.ofs)); } } @@ -318,20 +366,24 @@ void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { return; } - i32 off; - u32 base = addr_base(t, addr, &off, - (src.kind == OPK_IMM) ? RV_T1 : RV_T0); + /* Fold any index into a scratch via Zba sh{1,2,3}add. RV_T0 stays free + * for the IMM-src temporary in the OPK_IMM branch below, so route the + * fold scratch to RV_T1 in that case; the index-fold scratch matches + * addr_mode's tmp_reg. */ + u32 addr_tmp = (src.kind == OPK_IMM) ? RV_T1 : RV_T0; + addr = rv_fold_indexed(t, addr, addr_tmp); + RvAddrMode am = addr_mode(t, addr, addr_tmp); if (src.kind == OPK_IMM) { u32 sf = (sz == 8) ? 1u : 0u; rv64_emit_load_imm(mc, sf, RV_T0, src.v.imm); - rv64_emit32(mc, enc_int_store(sz, RV_T0, base, off)); + rv64_emit32(mc, enc_int_store(sz, RV_T0, am.base, am.ofs)); return; } if (src.cls == RC_FP) { - if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), base, off)); - else rv64_emit32(mc, rv_fsw(reg_num(src), base, off)); + if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), am.base, am.ofs)); + else rv64_emit32(mc, rv_fsw(reg_num(src), am.base, am.ofs)); } else { - rv64_emit32(mc, enc_int_store(sz, reg_num(src), base, off)); + rv64_emit32(mc, enc_int_store(sz, reg_num(src), am.base, am.ofs)); } } @@ -352,6 +404,10 @@ static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) { return; } if (lv.kind == OPK_INDIRECT) { + if (lv.v.ind.index != REG_NONE) { + compiler_panic(t->c, a->loc, + "rv64 addr_of: indexed INDIRECT not supported"); + } i32 ofs = lv.v.ind.ofs; u32 base = lv.v.ind.base & 0x1f; if (ofs >= -2048 && ofs <= 2047) { @@ -838,6 +894,8 @@ static Operand rv_call_stack_arg_addr(CGTarget* t, u32 stack_offset, addr.kind = OPK_INDIRECT; addr.cls = RC_INT; addr.v.ind.base = tail && !a->omit_frame ? RV_S0 : RV_SP; + addr.v.ind.index = REG_NONE; + addr.v.ind.log2_scale = 0; addr.v.ind.ofs = (i32)stack_offset; if (tail && !a->omit_frame) { addr.v.ind.ofs += 16 + (a->is_variadic ? 64 : 0); @@ -895,6 +953,8 @@ static Operand rv_offset_mem_operand(CGTarget* t, Operand op, u32 offset) { if (!s) compiler_panic(t->c, a->loc, "rv64 offset operand: bad slot"); op.kind = OPK_INDIRECT; op.v.ind.base = RV_S0; + op.v.ind.index = REG_NONE; + op.v.ind.log2_scale = 0; op.v.ind.ofs = -(i32)s->off + (i32)offset; } return op; @@ -961,6 +1021,10 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, rv64_emit32(mc, rv_add(dst_reg, RV_S0, dst_reg)); } } else if (av->storage.kind == OPK_INDIRECT) { + if (av->storage.v.ind.index != REG_NONE) { + compiler_panic(t->c, a->loc, + "rv64 call byval: indexed storage not supported"); + } u32 base = av->storage.v.ind.base & 0x1fu; i32 off = av->storage.v.ind.ofs; if (off >= -2048 && off <= 2047) { @@ -1395,6 +1459,8 @@ static Operand rv_call_plan_offset_operand(CGTarget* t, Operand op, if (!s) compiler_panic(t->c, a->loc, "rv64 call plan: bad slot"); op.kind = OPK_INDIRECT; op.v.ind.base = RV_S0; + op.v.ind.index = REG_NONE; + op.v.ind.log2_scale = 0; op.v.ind.ofs = -(i32)s->off + (i32)offset; } return op; @@ -1476,6 +1542,10 @@ static void rv_ret(CGTarget* t, const CGABIValue* val) { src_base_off = -(i32)s->off; nbytes = s->size; } else if (val->storage.kind == OPK_INDIRECT) { + if (val->storage.v.ind.index != REG_NONE) { + compiler_panic(t->c, a->loc, + "rv64 ret indirect: indexed storage not supported"); + } src_base = val->storage.v.ind.base & 0x1fu; src_base_off = val->storage.v.ind.ofs; nbytes = val->size; @@ -1685,10 +1755,10 @@ static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, if (addr.kind == OPK_REG) { base = reg_num(addr); } else if (addr.kind == OPK_LOCAL) { - i32 off; - base = addr_base(t, addr, &off, RV_T0); - if (off) { - rv64_emit32(mc, rv_addi(RV_T0, base, off)); + RvAddrMode am = addr_mode(t, addr, RV_T0); + base = am.base; + if (am.ofs) { + rv64_emit32(mc, rv_addi(RV_T0, base, am.ofs)); base = RV_T0; } } else { @@ -1722,9 +1792,9 @@ static void rv_atomic_store(CGTarget* t, Operand addr, Operand src, if (addr.kind == OPK_REG) { base = reg_num(addr); } else if (addr.kind == OPK_LOCAL) { - i32 off; - base = addr_base(t, addr, &off, RV_T0); - if (off) { rv64_emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; } + RvAddrMode am = addr_mode(t, addr, RV_T0); + base = am.base; + if (am.ofs) { rv64_emit32(mc, rv_addi(RV_T0, base, am.ofs)); base = RV_T0; } } else { compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI", (int)addr.kind); @@ -1747,10 +1817,9 @@ static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, if (addr.kind == OPK_REG) { rv64_emit32(mc, rv_addi(base, reg_num(addr), 0)); } else if (addr.kind == OPK_LOCAL) { - i32 off; - u32 b = addr_base(t, addr, &off, RV_T0); - if (b != RV_T0 || off) { - rv64_emit32(mc, rv_addi(base, b, off)); + RvAddrMode am = addr_mode(t, addr, RV_T0); + if (am.base != RV_T0 || am.ofs) { + rv64_emit32(mc, rv_addi(base, am.base, am.ofs)); } } else { compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI"); @@ -1799,8 +1868,8 @@ static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, u32 base = RV_T0; if (addr.kind == OPK_REG) rv64_emit32(mc, rv_addi(base, reg_num(addr), 0)); else if (addr.kind == OPK_LOCAL) { - i32 off; u32 b = addr_base(t, addr, &off, RV_T0); - if (b != RV_T0 || off) rv64_emit32(mc, rv_addi(base, b, off)); + RvAddrMode am = addr_mode(t, addr, RV_T0); + if (am.base != RV_T0 || am.ofs) rv64_emit32(mc, rv_addi(base, am.base, am.ofs)); } else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI"); u32 ereg = RV_T1, dreg = RV_T2; if (exp.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, ereg, exp.v.imm); diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c @@ -1421,6 +1421,8 @@ static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) { op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; if (form == X64_FORM_A) { if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); + if (op->v.ind.index != REG_NONE) + inline_panic(a, "inline asm: indexed addressing not supported"); render_indirect(sb, op->v.ind.base, op->v.ind.ofs); return; } @@ -1448,6 +1450,8 @@ static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) { return; } if (op->kind == OPK_INDIRECT) { + if (op->v.ind.index != REG_NONE) + inline_panic(a, "inline asm: indexed addressing not supported"); render_indirect(sb, op->v.ind.base, op->v.ind.ofs); return; } diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -246,6 +246,81 @@ void emit_lea(MCEmitter *mc, u32 dst, u32 base, i32 disp) { debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); } +/* Common low-level emit for `[base + index<<log2_scale + disp]` GPR/SSE + * memory operands. Builds REX with index, opcode(s), and SIB by hand + * (the existing `*_pack` helpers route through `x64_pack_mem`, which + * forces SIB index = 4 (none)). */ +static void emit_mem_idx_op(MCEmitter *mc, u8 prefix, int w, int force_rex, + u8 opc0, u8 opc1, u32 reg, u32 base, u32 index, + u32 log2_scale, i32 disp) { + u8 buf[16]; + u32 n = 0; + if (prefix) buf[n++] = prefix; + if (force_rex) + n += x64_pack_rex_force(buf + n, w, reg, index, base); + else + n += x64_pack_rex(buf + n, w, reg, index, base); + if (opc1) { + buf[n++] = X64_OPC_TWOBYTE; + buf[n++] = opc1; + } else { + buf[n++] = opc0; + } + n += x64_pack_mem_sib(buf + n, reg, base, index, log2_scale, disp); + mc->emit_bytes(mc, buf, n); +} + +/* mov reg, [base + index<<log2_scale + disp]; size 1/2/4/8. */ +void emit_mov_load_idx(MCEmitter *mc, u32 size, int signed_ext, u32 dst, + u32 base, u32 index, u32 log2_scale, i32 disp) { + if (index == REG_NONE) { + emit_mov_load(mc, size, signed_ext, dst, base, disp); + return; + } + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (size == 8) { + emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu, + log2_scale, disp); + } else if (size == 4) { + emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu, + log2_scale, disp); + } else if (size == 2) { + emit_mem_idx_op(mc, 0, 0, 0, 0, + signed_ext ? X64_OPC_MOVSX_W : X64_OPC_MOVZX_W, dst, base, + index & 0xFu, log2_scale, disp); + } else if (size == 1) { + emit_mem_idx_op(mc, 0, 0, 0, 0, + signed_ext ? X64_OPC_MOVSX_B : X64_OPC_MOVZX_B, dst, base, + index & 0xFu, log2_scale, disp); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* mov [base + index<<log2_scale + disp], src; size 1/2/4/8. */ +void emit_mov_store_idx(MCEmitter *mc, u32 size, u32 src, u32 base, u32 index, + u32 log2_scale, i32 disp) { + if (index == REG_NONE) { + emit_mov_store(mc, size, src, base, disp); + return; + } + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (size == 8) { + emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu, + log2_scale, disp); + } else if (size == 4) { + emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu, + log2_scale, disp); + } else if (size == 2) { + emit_mem_idx_op(mc, X64_OPSIZE_PFX, 0, 0, X64_OPC_MOV_RM_R, 0, src, base, + index & 0xFu, log2_scale, disp); + } else if (size == 1) { + /* Force REX so SIL/DIL/etc are addressable as byte regs. */ + emit_mem_idx_op(mc, 0, 0, 1, X64_OPC_MOV_RM_R8, 0, src, base, index & 0xFu, + log2_scale, disp); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + /* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r * imm32) for !is64. Both 10/5 bytes. */ void x64_emit_load_imm(MCEmitter *mc, int is64, u32 dst, i64 imm) { @@ -500,6 +575,28 @@ void emit_sse_store(MCEmitter *mc, u8 prefix, u8 opcode, u32 src, u32 base, if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); } +void emit_sse_load_idx(MCEmitter *mc, u8 prefix, u8 opcode, u32 dst, u32 base, + u32 index, u32 log2_scale, i32 disp) { + if (index == REG_NONE) { + emit_sse_load(mc, prefix, opcode, dst, base, disp); + return; + } + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, dst, base, index & 0xFu, + log2_scale, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_sse_store_idx(MCEmitter *mc, u8 prefix, u8 opcode, u32 src, u32 base, + u32 index, u32 log2_scale, i32 disp) { + if (index == REG_NONE) { + emit_sse_store(mc, prefix, opcode, src, base, disp); + return; + } + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, src, base, index & 0xFu, + log2_scale, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst, u32 src) { u32 ofs = obj_pos(mc->obj, mc->section_id); diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -244,6 +244,12 @@ void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base, i32 disp); void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp); void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp); +/* Indexed-addressing variants: [base + index<<log2_scale + disp]. Pass + * index = REG_NONE to fall back to the plain [base + disp] encoding. */ +void emit_mov_load_idx(MCEmitter* mc, u32 size, int signed_ext, u32 dst, + u32 base, u32 index, u32 log2_scale, i32 disp); +void emit_mov_store_idx(MCEmitter* mc, u32 size, u32 src, u32 base, u32 index, + u32 log2_scale, i32 disp); void emit_ret(MCEmitter* mc); void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm); void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src); @@ -270,6 +276,10 @@ void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, i32 disp); void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, i32 disp); +void emit_sse_load_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, + u32 index, u32 log2_scale, i32 disp); +void emit_sse_store_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, + u32 index, u32 log2_scale, i32 disp); void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, u32 src); diff --git a/src/arch/x64/isa.h b/src/arch/x64/isa.h @@ -431,6 +431,38 @@ static inline u32 x64_pack_mem(u8* out, u32 reg, u32 base, i32 disp) { return n; } +/* Pack a SIB-form memory operand `[base + index*scale + disp]`. Emits SIB + * unconditionally; pass index = 4 (RSP) for the no-index case (the SIB + * "no index" encoding). `log2_scale` ∈ {0,1,2,3} for byte scale 1/2/4/8. + * + * RBP/R13 base needs at least disp8 even when disp == 0 (mod=00 with + * SIB base=5 means "no base, disp32 only"). RSP/R12 base requires SIB + * regardless — which is what this helper provides. */ +static inline u32 x64_pack_mem_sib(u8* out, u32 reg, u32 base, u32 index, + u32 log2_scale, i32 disp) { + /* For SIB base encoding, base=5 (RBP/R13) cannot use mod=0; force + * disp8/disp32. Other bases can use the standard mod selection. */ + u32 m; + if ((base & 7u) == 5u && disp == 0) { + m = 1u; /* disp8 = 0 */ + } else if (disp == 0) { + m = 0u; + } else if (disp >= -128 && disp <= 127) { + m = 1u; + } else { + m = 2u; + } + u32 n = 0; + out[n++] = x64_modrm(m, reg, 4u); /* r/m = 4 → SIB follows */ + out[n++] = x64_sib(log2_scale & 3u, index, base); + if (m == 1u) { + out[n++] = (u8)(i8)disp; + } else if (m == 2u) { + n += x64_put_u32le(out + n, (u32)disp); + } + return n; +} + /* Pack a reg-form ModR/M (mod=3) — one byte. */ static inline u32 x64_pack_rm_reg(u8* out, u32 reg, u32 rm) { out[0] = x64_modrm(3u, reg, rm); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -106,22 +106,47 @@ static void x_copy(CGTarget* t, Operand dst, Operand src) { emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu); } -static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) { +/* Resolve an addr operand to the full effective-address tuple + * (base, index, log2_scale, ofs). `OPK_LOCAL` resolves to its RBP-relative + * slot offset with no index. `OPK_INDIRECT` carries the EA verbatim: + * `index == REG_NONE` for plain base+disp, otherwise the SIB scaled-index + * form (`log2_scale ∈ {0,1,2,3}` for byte scale 1/2/4/8). */ +static u32 addr_mode(CGTarget* t, Operand addr, u32* out_index, + u32* out_log2_scale, i32* out_off) { XImpl* a = impl_of(t); if (addr.kind == OPK_LOCAL) { XSlot* s = x64_slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot"); + if (!s) compiler_panic(t->c, a->loc, "x64 addr_mode: bad slot"); + *out_index = REG_NONE; + *out_log2_scale = 0; *out_off = -(i32)s->off; return X64_RBP; } if (addr.kind == OPK_INDIRECT) { + *out_index = (addr.v.ind.index == REG_NONE) ? REG_NONE + : (addr.v.ind.index & 0xFu); + *out_log2_scale = addr.v.ind.log2_scale; *out_off = addr.v.ind.ofs; return addr.v.ind.base & 0xFu; } - compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported", + compiler_panic(t->c, a->loc, "x64 addr_mode: kind %d unsupported", (int)addr.kind); } +/* Plain-base+disp accessor for non-load/store paths (atomics, calls, + * spill/reload, copy_bytes/set_bytes, inline asm). Per the EA contract, + * those paths always see `index == REG_NONE`; assert that here so any + * regression is caught at the boundary. */ +static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) { + u32 idx, ls; + u32 base = addr_mode(t, addr, &idx, &ls, out_off); + if (idx != REG_NONE) { + compiler_panic(t->c, impl_of(t)->loc, + "x64 addr_base: indexed addr in non-load/store path"); + } + return base; +} + static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) { return obj_symbol_extern_via_got(t->c, t->obj, sym); } @@ -297,13 +322,15 @@ void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { } i32 off; - u32 base = addr_base(t, addr, &off); + u32 idx, ls; + u32 base = addr_mode(t, addr, &idx, &ls, &off); if (dst.cls == RC_FP) { u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off); + emit_sse_load_idx(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, idx, ls, + off); } else { int signed_ = type_is_signed(ma.type ? ma.type : addr.type); - emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off); + emit_mov_load_idx(t->mc, sz, signed_, dst.v.reg & 0xFu, base, idx, ls, off); } } @@ -348,20 +375,22 @@ void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { } i32 off; - u32 base = addr_base(t, addr, &off); + u32 idx, ls; + u32 base = addr_mode(t, addr, &idx, &ls, &off); if (src.kind == OPK_IMM) { int w = (sz == 8) ? 1 : 0; x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm); - emit_mov_store(t->mc, sz, X64_RAX, base, off); + emit_mov_store_idx(t->mc, sz, X64_RAX, base, idx, ls, off); return; } if (src.cls == RC_FP) { u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off); + emit_sse_store_idx(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, idx, ls, + off); return; } - emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off); + emit_mov_store_idx(t->mc, sz, src.v.reg & 0xFu, base, idx, ls, off); } static void x_addr_of(CGTarget* t, Operand dst, Operand lv) { @@ -373,6 +402,9 @@ static void x_addr_of(CGTarget* t, Operand dst, Operand lv) { return; } if (lv.kind == OPK_INDIRECT) { + if (lv.v.ind.index != REG_NONE) { + x_panic(t, "addr_of: indexed INDIRECT lvalue (cg should fold)"); + } emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs); return; } @@ -1102,6 +1134,8 @@ static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset, int tail) { addr.kind = OPK_INDIRECT; addr.cls = RC_INT; addr.v.ind.base = tail && !a->omit_frame ? X64_RBP : X64_RSP; + addr.v.ind.index = REG_NONE; + addr.v.ind.log2_scale = 0; addr.v.ind.ofs = (i32)stack_offset + (tail ? 8 : 0); if (tail && !a->omit_frame) addr.v.ind.ofs = 16 + (i32)stack_offset; return addr; diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -357,14 +357,26 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { ma.type = i64_ty; ma.size = 8; ma.align = 8; - if (sz < 8) { + if (sz < 8 && ck == CV_SEXT) { low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty); low = api_op_reg(low_tmp, i64_ty); - T->convert(T, ck == CV_SEXT ? CV_SEXT : CV_ZEXT, low, src); + T->convert(T, CV_SEXT, low, src); src_ty = i64_ty; } else { low.type = i64_ty; } + if (ck != CV_SEXT && !g->c->target.big_endian) { + T->store(T, dst_lv, low, ma); + if (low_tmp != REG_NONE) api_free_reg(g, low_tmp, RC_INT); + api_release(g, &v); + ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + base = api_op_reg(ar, ptr_ty); + T->addr_of(T, base, dst_lv); + T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma); + api_free_reg(g, ar, RC_INT); + api_push(g, api_make_lv(dst_lv, dty)); + return; + } ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); base = api_op_reg(ar, ptr_ty); T->addr_of(T, base, dst_lv); @@ -397,6 +409,9 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty)); } else if (v.op.kind == OPK_IMM) { T->load_imm(T, dst, v.op.v.imm); + } else if (v.op.kind == OPK_REG) { + Operand src_addr = api_op_indirect(v.op.v.reg, 0, dty); + T->load(T, dst, src_addr, api_mem_for_lvalue(g, &src_addr, dty)); } else { compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported i128 truncation source"); diff --git a/src/cg/call.c b/src/cg/call.c @@ -30,6 +30,9 @@ void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty, } else if (cg_type_is_aggregate(g->c, aty)) { api_ensure_reg(g, &arg); Operand st = arg.op; + if (!api_is_lvalue_sv(&arg) && st.kind == OPK_REG) { + st = api_op_indirect(st.v.reg, 0, aty); + } st.type = aty; av->storage = st; av->size = abi_cg_sizeof(g->c->abi, aty); @@ -335,7 +338,11 @@ void cfree_cg_ret(CfreeCg* g) { av.abi = &g->fn_abi->ret; int is_aggregate = cg_type_is_aggregate(g->c, rty); if (is_aggregate) { + api_ensure_reg(g, &v); av.storage = v.op; + if (!api_is_lvalue_sv(&v) && av.storage.kind == OPK_REG) { + av.storage = api_op_indirect(av.storage.v.reg, 0, rty); + } av.storage.type = rty; av.size = abi_cg_sizeof(g->c->abi, rty); T->ret(T, &av); diff --git a/src/cg/control.c b/src/cg/control.c @@ -258,14 +258,19 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, decl.as.object.flags = CFREE_CG_OBJ_READONLY; api_remember_sym(g, table_sym, arr_ty, decl); - /* 6. Compute &table[idx] and load the label address. */ - cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */ - cfree_cg_swap(g); /* [table_lv, idx] */ - cfree_cg_index(g, 0); /* [&table[idx]] */ + /* 6. Load table[idx]: push the table's address, swap so the index is on top, + * then load with EA scale = pointer size. */ + cfree_cg_push_symbol_addr(g, (CfreeCgSym)table_sym, 0); /* [idx, &table] */ + cfree_cg_swap(g); /* [&table, idx] */ memset(&acc, 0, sizeof acc); acc.type = void_ptr_ty; acc.align = (uint32_t)c->target.ptr_align; - cfree_cg_load(g, acc); /* [label_addr] */ + { + CfreeCgEffAddr ea; + ea.offset = 0; + ea.scale = (uint32_t)c->target.ptr_size; + cfree_cg_load(g, acc, ea); /* [label_addr] */ + } /* 7. Indirect branch with the full closed target set (every case + * default), so backends doing branch-target hardening (BTI/IBT/CFG) @@ -792,170 +797,6 @@ void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, api_release(g, &dst); } -void cfree_cg_index(CfreeCg* g, uint64_t offset) { - ApiSValue idx, base; - CGTarget* T; - CfreeCgTypeId base_ty, base_ptr_ty, elem_ty, idx_ty; - const CgType* base_info; - u32 elemsz; - int free_base_op = 0; - Operand base_op, idx_op, result; - Reg rr; - if (!g) return; - if (offset > INT64_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: index offset too large"); - return; - } - T = g->target; - idx = api_pop(g); - base = api_pop(g); - api_ensure_reg(g, &base); - base_ty = api_sv_type(&base); - base_info = cg_type_get(g->c, base_ty); - if (base_info && base_info->kind == CFREE_CG_TYPE_PTR) { - elem_ty = base_info->ptr.pointee; - base_ptr_ty = base_ty; - } else if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY && - api_is_lvalue_sv(&base)) { - elem_ty = base_info->array.elem; - base_ptr_ty = cg_type_ptr_to(g->c, elem_ty); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: index base is not a pointer or array lvalue"); - return; - } - elemsz = (u32)abi_cg_sizeof(g->c->abi, elem_ty); - idx_ty = idx.type ? idx.type : idx.op.type; - if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I32); - if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY) { - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - base_op = api_op_reg(rr, base_ptr_ty); - T->addr_of(T, base_op, base.op); - api_release(g, &base); - free_base_op = 1; - } else { - base_op = api_force_reg(g, &base, base_ptr_ty); - } - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - result = api_op_reg(rr, base_ptr_ty); - if (idx_op.kind == OPK_IMM) { - i64 total_offset = idx_op.v.imm * (i64)elemsz + (i64)offset; - T->binop(T, BO_IADD, result, base_op, - api_op_imm(total_offset, base_ptr_ty)); - } else { - Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); - Operand scaled = api_op_reg(sr, idx_ty); - /* Allocating `scaled` can materialize a delayed index expression into a - * fresh virtual register under opt. Refresh idx_op so the multiply uses - * the materialized value, not the pre-materialization source operand. */ - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - if (idx.op.kind == OPK_REG) idx_op = idx.op; - T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty)); - if (offset > 0) { - T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); - } - T->binop(T, BO_IADD, result, base_op, scaled); - api_free_reg(g, sr, RC_INT); - } - if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT); - if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) - api_release(g, &base); - api_release(g, &idx); - api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty)); -} - -void cfree_cg_field(CfreeCg* g, uint32_t field_index) { - ApiSValue base; - CGTarget* T; - CfreeCgTypeId rec_ty; - CfreeCgTypeId field_ty; - CfreeCgTypeId rec_ptr_ty; - const CgType* rec_info; - const ABIRecordLayout* layout; - u32 field_offset; - Operand result; - Reg rr; - if (!g) return; - T = g->target; - base = api_pop(g); - api_ensure_reg(g, &base); - rec_ty = api_sv_type(&base); - if (!api_is_lvalue_sv(&base)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: field base is not an lvalue"); - return; - } - layout = abi_cg_record_layout(g->c->abi, rec_ty); - if (!layout || field_index >= layout->nfields) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid field index"); - return; - } - rec_info = cg_type_get(g->c, rec_ty); - if (!rec_info || rec_info->kind != CFREE_CG_TYPE_RECORD || - field_index >= rec_info->record.nfields) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid record base"); - return; - } - field_ty = rec_info->record.fields[field_index].type; - rec_ptr_ty = cg_type_ptr_to(g->c, rec_ty); - field_offset = layout->fields[field_index].offset; - if (layout->fields[field_index].bit_width != 0 || - (rec_info->record.fields[field_index].flags & CFREE_CG_FIELD_BITFIELD) != - 0) { - Operand base_addr; - ApiSValue sv; - BitFieldAccess bf; - if (layout->fields[field_index].bit_width == 0) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: zero-width bit-field access"); - api_release(g, &base); - return; - } - base_addr = api_lvalue_addr(g, &base, rec_ptr_ty); - memset(&bf, 0, sizeof bf); - bf.field_type = field_ty; - bf.storage = api_mem_for_lvalue(g, &base_addr, field_ty); - bf.storage.size = layout->fields[field_index].storage_size; - bf.storage_offset = layout->fields[field_index].offset; - bf.bit_offset = layout->fields[field_index].bit_offset; - bf.bit_width = layout->fields[field_index].bit_width; - bf.signed_ = rec_info->record.fields[field_index].bit_signed != 0; - sv = api_make_lv(base_addr, field_ty); - sv.bitfield_lvalue = 1; - sv.delayed.bitfield = bf; - api_release(g, &base); - api_push(g, sv); - return; - } - if (base.op.kind == OPK_GLOBAL) { - result = - api_op_global(base.op.v.global.sym, - base.op.v.global.addend + (i64)field_offset, field_ty); - api_push(g, api_make_lv(result, field_ty)); - } else if (base.op.kind == OPK_INDIRECT && field_offset <= (u32)INT32_MAX && - base.op.v.ind.ofs <= INT32_MAX - (i32)field_offset) { - result = api_op_indirect(base.op.v.ind.base, - base.op.v.ind.ofs + (i32)field_offset, field_ty); - api_push(g, api_make_lv(result, field_ty)); - } else { - Operand base_addr; - rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - base_addr = api_op_reg(rr, rec_ptr_ty); - T->addr_of(T, base_addr, base.op); - api_release(g, &base); - if (field_offset == 0) { - result = base_addr; - } else { - Reg fr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - result = api_op_reg(fr, rec_ptr_ty); - T->binop(T, BO_IADD, result, base_addr, - api_op_imm((i64)field_offset, rec_ptr_ty)); - api_free_reg(g, base_addr.v.reg, RC_INT); - } - api_push(g, - api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); - } -} - /* ============================================================ * Calls / return * ============================================================ */ diff --git a/src/cg/data.c b/src/cg/data.c @@ -615,8 +615,8 @@ void cfree_cg_data_end(CfreeCg* g) { * time once the wrapped backend's func_begin has set cur_func_sym. The * helper does not register the symbol with CfreeCg's sym table; * callers wire its CfreeCg type via api_remember_sym so subsequent - * cfree_cg_push_symbol_lvalue / cfree_cg_index can address it as an - * array of pointers. */ + * cfree_cg_push_symbol_addr + load(scale=ptr_size) can address it as + * an array of pointers. */ ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n) { Compiler* c; ObjBuilder* ob; diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -296,8 +296,6 @@ void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, CfreeCgMemAccess src_access); void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, CfreeCgMemAccess dst_access); -void cfree_cg_index(CfreeCg* g, uint64_t offset); -void cfree_cg_field(CfreeCg* g, uint32_t field_index); void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, CfreeCgDataDefAttrs attrs); void cfree_cg_data_common(CfreeCg* g, CfreeCgSym cg_sym, uint64_t size, @@ -321,7 +319,6 @@ void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs, void cfree_cg_data_end(CfreeCg* g); ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n); DebugTypeId api_debug_type(CfreeCg* g, CfreeCgTypeId id); -int api_source_flags_addr_taken(u32 flags); int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, CfreeCgLocalAttrs attrs); CfreeCgLocal api_local_handle(u32 index); @@ -345,14 +342,11 @@ void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg, void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local); void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local); void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend); -void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend); -void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset, - CfreeCgTypeId result_type); -void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access); -void cfree_cg_indirect(CfreeCg* g); +void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea); void cfree_cg_addr(CfreeCg* g); -void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access); +void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea); void cfree_cg_dup(CfreeCg* g); +void cfree_cg_dup2(CfreeCg* g); void cfree_cg_swap(CfreeCg* g); void cfree_cg_drop(CfreeCg* g); int cfree_cg_top_const_int(CfreeCg* g, int64_t* out_value); @@ -389,6 +383,8 @@ Operand api_op_reg(Reg r, CfreeCgTypeId ty); Operand api_op_local(FrameSlot s, CfreeCgTypeId ty); Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty); Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty); +Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs, + CfreeCgTypeId ty); u8 api_residency_for(const Operand* o); ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty); ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty); diff --git a/src/cg/local.c b/src/cg/local.c @@ -1,12 +1,12 @@ #include "cg/internal.h" -int api_source_flags_addr_taken(u32 flags) { - return (flags & CFREE_CG_LOCAL_ADDRESS_TAKEN) != 0; -} - int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, CfreeCgLocalAttrs attrs) { - if (api_source_flags_addr_taken(attrs.flags)) return 1; + (void)attrs; + /* Aggregates (records, arrays), wide16 (f128/i128), vararg state, and any + * non-scalar type must live in memory. All scalar locals are placed on the + * frame at CG time; opt is responsible for promoting non-escaped scalars + * back into PRegs. */ if (api_is_wide16_scalar_type(g->c, ty)) return 1; return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) || cg_type_is_ptr(g->c, ty)); @@ -83,8 +83,6 @@ CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, desc.loc = g->cur_loc; desc.size = abi_cg_sizeof(g->c->abi, type); desc.align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type); - if (api_source_flags_addr_taken(attrs.flags)) - desc.flags |= CG_LOCAL_ADDR_TAKEN; if (api_local_requires_memory(g, ty, attrs)) desc.flags |= CG_LOCAL_MEMORY_REQUIRED; if (g->target->local) @@ -134,7 +132,6 @@ CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, pd.type = ty; pd.size = size; pd.align = align; - if (api_source_flags_addr_taken(attrs.flags)) pd.flags |= CG_LOCAL_ADDR_TAKEN; if (api_local_requires_memory(g, ty, attrs)) pd.flags |= CG_LOCAL_MEMORY_REQUIRED; if (g->fn_abi && index < g->fn_abi->nparams) { diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -146,170 +146,458 @@ void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend) { } } -void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend) { - CfreeCgTypeId ty; - if (!g) return; - ty = api_sym_type(g, sym); - if (!ty) return; - if (api_sym_is_tls(g, sym)) { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand dst = api_op_reg(r, ptr_ty); - g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); - api_push(g, api_make_lv(api_op_indirect(r, 0, ty), ty)); - } else { - api_push(g, api_make_lv(api_op_global((ObjSymId)sym, addend, ty), ty)); +/* ============================================================ + * Load / addr / store + * + * The memops dispatch on the TOS shape of `base`: + * 1. lvalue base, no index -> fold ea.offset into the operand encoding + * (OPK_LOCAL / OPK_GLOBAL / OPK_INDIRECT) and emit a single memop. + * 2. lvalue base, scaled index -> take the lvalue's address, then form an + * indexed OPK_INDIRECT and emit a single memop. + * 3. pointer-rvalue base, no index -> for OPK_GLOBAL fold the offset into + * the addend; otherwise use [reg + offset]. + * 4. pointer-rvalue base, scaled index -> form OPK_INDIRECT against the + * pointer register with index/scale. + * + * Scale normalization (ea.scale != 0): + * {1,2,4,8} -> log2_scale 0..3 on OPK_INDIRECT. + * otherwise -> compute index*scale into a fresh register, fold into base, + * dispatch with index = REG_NONE. + * ============================================================ */ + +/* log2 of a {1,2,4,8} scale, else -1. */ +static int scale_to_log2(uint32_t scale) { + switch (scale) { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + default: + return -1; } } -void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset, - CfreeCgTypeId result_type) { - ApiSValue v; - CfreeCgTypeId rty; - CfreeCgTypeId ptr_ty; - Operand base; - Operand result; - Reg rr; - int want_ptr; - int base_is_lvalue; - int free_base = 0; - if (!g) return; - rty = resolve_type(g->c, result_type); - if (!rty) return; - v = api_pop(g); - want_ptr = cg_type_is_ptr(g->c, rty); - base_is_lvalue = api_is_lvalue_sv(&v); - if (v.source_local != CFREE_CG_LOCAL_NONE) - api_local_const_clear(api_local_from_handle(g, v.source_local)); - api_ensure_reg(g, &v); - if (v.op.kind == OPK_GLOBAL) { - result = api_op_global(v.op.v.global.sym, - v.op.v.global.addend + byte_offset, rty); - api_push(g, want_ptr ? api_make_sv(result, rty) : api_make_lv(result, rty)); - return; +/* Materialize the EA into an OPK_INDIRECT operand suitable for the backend + * load/store. `addr` may be OPK_LOCAL, OPK_GLOBAL, or OPK_INDIRECT. The + * `offset` is folded into the operand; the indexed form is encoded directly. + * If the index path requires arithmetic on a global/local, the address is + * first lowered into a register via T->addr_of and then combined. + * + * The caller owns `index_reg` (REG_NONE if no index). On return, + * *out_owned_base is set to the register that the caller must free after the + * memop completes (REG_NONE if no new register was allocated). The returned + * operand's index register is freed by the caller separately when applicable; + * this helper does not free it. + */ +static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, + Reg index, u8 log2_scale, + CfreeCgTypeId access_ty, + Reg* out_owned_base) { + CGTarget* T = g->target; + CfreeCgTypeId base_ty = + cg_type_is_ptr(g->c, addr.type) ? addr.type + : cg_type_ptr_to(g->c, access_ty); + *out_owned_base = REG_NONE; + + if (index == REG_NONE) { + /* No index: fold offset into the operand directly. */ + if (addr.kind == OPK_LOCAL) { + Operand r; + if (offset == 0) { + r = addr; + r.type = access_ty; + return r; + } + /* Local frame slots have no native displacement encoding in OPK_LOCAL; + * materialize the base address into a register and apply the offset. */ + { + Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); + Operand base_reg = api_op_reg(br, base_ty); + T->addr_of(T, base_reg, addr); + if (offset >= INT32_MIN && offset <= INT32_MAX) { + *out_owned_base = br; + return api_op_indirect(br, (i32)offset, access_ty); + } + T->binop(T, BO_IADD, base_reg, base_reg, + api_op_imm(offset, base_ty)); + *out_owned_base = br; + return api_op_indirect(br, 0, access_ty); + } + } + if (addr.kind == OPK_GLOBAL) { + Operand r = api_op_global(addr.v.global.sym, addr.v.global.addend + offset, + access_ty); + return r; + } + if (addr.kind == OPK_INDIRECT) { + i64 sum = (i64)addr.v.ind.ofs + offset; + if (sum >= INT32_MIN && sum <= INT32_MAX) { + return api_op_indirect_indexed(addr.v.ind.base, addr.v.ind.index, + addr.v.ind.log2_scale, (i32)sum, + access_ty); + } + /* Offset too large for i32 displacement; materialize. */ + { + Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); + Operand base_reg = api_op_reg(br, base_ty); + T->copy(T, base_reg, api_op_reg(addr.v.ind.base, base_ty)); + T->binop(T, BO_IADD, base_reg, base_reg, + api_op_imm(offset, base_ty)); + *out_owned_base = br; + return api_op_indirect_indexed(br, addr.v.ind.index, + addr.v.ind.log2_scale, + addr.v.ind.ofs, access_ty); + } + } + /* OPK_REG (pointer rvalue) */ + if (offset >= INT32_MIN && offset <= INT32_MAX) { + return api_op_indirect(addr.v.reg, (i32)offset, access_ty); + } + { + Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); + Operand base_reg = api_op_reg(br, base_ty); + T->binop(T, BO_IADD, base_reg, api_op_reg(addr.v.reg, base_ty), + api_op_imm(offset, base_ty)); + *out_owned_base = br; + return api_op_indirect(br, 0, access_ty); + } } - if (!want_ptr && v.op.kind == OPK_INDIRECT) { - i64 ofs = (i64)v.op.v.ind.ofs + byte_offset; - if (ofs >= INT32_MIN && ofs <= INT32_MAX) { - result = api_op_indirect(v.op.v.ind.base, (i32)ofs, rty); - api_push(g, api_make_lv(result, rty)); - return; + + /* Indexed form. addr must be reduced to a base register first when it is + * not already an OPK_INDIRECT with a free index slot. */ + if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE && + offset == 0) { + /* Reuse existing INDIRECT base; add index and scale. The displacement + * stays whatever the operand already had. */ + return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale, + addr.v.ind.ofs, access_ty); + } + if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE) { + i64 sum = (i64)addr.v.ind.ofs + offset; + if (sum >= INT32_MIN && sum <= INT32_MAX) { + return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale, + (i32)sum, access_ty); } } - ptr_ty = want_ptr ? rty : cg_type_ptr_to(g->c, rty); - if (!base_is_lvalue && cg_type_is_ptr(g->c, api_sv_type(&v))) - ptr_ty = api_sv_type(&v); - if (base_is_lvalue) { - base = api_lvalue_addr(g, &v, ptr_ty); - free_base = 1; - } else { - base = api_force_reg(g, &v, ptr_ty); + /* Otherwise, materialize addr into a register and then build the indexed + * operand around it. */ + { + Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); + Operand base_reg = api_op_reg(br, base_ty); + if (addr.kind == OPK_REG) { + T->copy(T, base_reg, api_op_reg(addr.v.reg, base_ty)); + } else { + T->addr_of(T, base_reg, addr); + } + if (offset != 0) { + if (offset >= INT32_MIN && offset <= INT32_MAX) { + *out_owned_base = br; + return api_op_indirect_indexed(br, index, log2_scale, (i32)offset, + access_ty); + } + T->binop(T, BO_IADD, base_reg, base_reg, api_op_imm(offset, base_ty)); + } + *out_owned_base = br; + return api_op_indirect_indexed(br, index, log2_scale, 0, access_ty); } - rr = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - result = api_op_reg(rr, ptr_ty); - g->target->binop(g->target, BO_IADD, result, base, - api_op_imm(byte_offset, ptr_ty)); - if (free_base) api_free_reg(g, base.v.reg, RC_INT); - api_release(g, &v); - if (want_ptr) { - result.type = rty; - api_push(g, api_make_sv(result, rty)); +} + +/* Pop the index operand for a scaled-index memop. Returns the index in a + * freshly allocated register that the caller owns and must free after the + * memop. Handles the scale-not-in-{1,2,4,8} case by computing index*scale. + * + * On return: + * *out_log2 = log2_scale (0..3) if scale was normalized to one of {1,2,4,8} + * or to 0 if we materialized the scaled value (log2=0). + */ +static Reg pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) { + ApiSValue idx; + CfreeCgTypeId idx_ty; + int lg2; + Operand idx_op; + CGTarget* T = g->target; + Reg sr; + Operand scaled; + + idx = api_pop(g); + idx_ty = api_sv_type(&idx); + if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I64); + + lg2 = scale_to_log2(scale); + if (lg2 >= 0) { + *out_log2 = (u8)lg2; + /* Always allocate a fresh register so the caller has unambiguous + * ownership; copy the index value in. */ + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); + scaled = api_op_reg(sr, idx_ty); + if (idx_op.kind == OPK_IMM) { + T->load_imm(T, scaled, idx_op.v.imm); + } else { + /* Re-fetch in case alloc materialized a delayed expression. */ + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_REG) idx_op = idx.op; + T->copy(T, scaled, idx_op); + } + api_release(g, &idx); + return sr; + } + + /* Non-power-of-two scale: materialize index*scale into a fresh register. */ + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); + scaled = api_op_reg(sr, idx_ty); + if (idx_op.kind == OPK_IMM) { + T->load_imm(T, scaled, idx_op.v.imm * (i64)scale); } else { - api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, rty), rty)); + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_REG) idx_op = idx.op; + T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)scale, idx_ty)); } + api_release(g, &idx); + *out_log2 = 0; + return sr; } -/* ============================================================ - * Load / addr / store - * ============================================================ */ +/* Build a BitFieldAccess descriptor from the CfreeCgMemAccess metadata. */ +static BitFieldAccess bf_from_access(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgTypeId field_ty, + const Operand* addr) { + BitFieldAccess bf; + memset(&bf, 0, sizeof bf); + bf.field_type = field_ty; + bf.storage = api_mem_for_lvalue(g, addr, field_ty); + if (access.storage_size) bf.storage.size = access.storage_size; + bf.storage_offset = 0; + bf.bit_offset = access.bit_offset; + bf.bit_width = access.bit_width; + bf.signed_ = access.bit_signed ? 1 : 0; + return bf; +} -void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access) { - ApiSValue v; +/* Pop the base for a memop; populate `*base_addr` with an operand the backend + * can consume (LOCAL/GLOBAL/INDIRECT for lvalue forms, or REG holding a + * pointer for rvalue forms). Returns 1 if `base` is an lvalue, 0 otherwise. + * + * Sets `*source_local_out` to the lvalue's source_local handle when applicable + * (so the caller can update constant tracking). Sets `*lvalue_sv` to a copy of + * the popped lvalue so the caller can call api_release on it after the memop; + * for rvalue-pointer forms, `*lvalue_sv` is the popped value (used for + * release). + */ + +void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { + ApiSValue base; + CGTarget* T; CfreeCgTypeId ty; + CfreeCgTypeId access_ty; + Reg owned_base = REG_NONE; + Reg owned_index = REG_NONE; + u8 log2_scale = 0; + Operand mem_op; + Reg dst_r; Operand dst; + int is_lvalue; + int is_bitfield; + int has_index; if (!g) return; + T = g->target; if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); - v = api_pop(g); - if (!api_is_lvalue_sv(&v)) { - api_push(g, v); - return; - } - ty = api_mem_access_type(g, access, api_sv_type(&v), "load"); - if (v.bitfield_lvalue) { - CfreeCgTypeId load_ty = ty; - Reg rr; - api_require_scalar_mem_type(g, "load", load_ty); - rr = api_alloc_reg_or_spill(g, RC_INT, load_ty); - dst = api_op_reg(rr, load_ty); - g->target->bitfield_load(g->target, dst, v.op, v.delayed.bitfield); - api_release(g, &v); - api_push(g, api_make_sv(dst, load_ty)); - return; + + has_index = (ea.scale != 0); + is_bitfield = (access.bit_width != 0); + + if (has_index) { + /* Pop and normalize the index first; it sits between base and any + * follow-up value (none for load). */ + owned_index = pop_and_normalize_index(g, ea.scale, &log2_scale); } - if (cg_type_is_aggregate(g->c, api_sv_type(&v))) { - u32 access_size; - u32 lvalue_size; - if (!cg_type_is_aggregate(g->c, ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: load scalar access from aggregate lvalue " - "requires selecting a field"); + + base = api_pop(g); + is_lvalue = api_is_lvalue_sv(&base); + + /* Aggregate / non-EA fast paths only apply to the no-index, no-bitfield + * case where the result is the lvalue itself (matches old behavior). + * Scalar accesses at an offset into an aggregate lvalue are the canonical + * field-access pattern under the EA model and fall through to the normal + * scalar load path below. */ + if (!has_index && !is_bitfield && is_lvalue && ea.offset == 0 && + cg_type_is_aggregate(g->c, api_sv_type(&base))) { + ty = api_mem_access_type(g, access, api_sv_type(&base), "load"); + if (cg_type_is_aggregate(g->c, ty)) { + u32 access_size = api_mem_type_size(g, ty, "load"); + u32 lvalue_size = api_mem_type_size(g, api_sv_type(&base), "load"); + if (access_size != lvalue_size) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: load aggregate type/size mismatch: access " + "size %u, lvalue size %u", + (unsigned)access_size, (unsigned)lvalue_size); + } + api_push(g, base); + return; } - access_size = api_mem_type_size(g, ty, "load"); - lvalue_size = api_mem_type_size(g, api_sv_type(&v), "load"); - if (access_size != lvalue_size) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: load aggregate type/size mismatch: access size " - "%u, lvalue size %u", - (unsigned)access_size, (unsigned)lvalue_size); + /* Scalar access from aggregate lvalue: fall through. */ + } + + /* From here on, base must reduce to something we can address. */ + if (!is_lvalue && cg_type_is_aggregate(g->c, api_sv_type(&base))) { + /* Pointer rvalue to aggregate without any EA -- return as-is. */ + if (!has_index && !is_bitfield && ea.offset == 0) { + api_push(g, base); + return; } - api_push(g, v); - return; } - api_require_scalar_mem_type(g, "load", ty); - if (api_is_wide16_scalar_type(g->c, ty)) { - v.type = ty; - v.op.type = ty; - api_push(g, v); + + ty = api_mem_access_type(g, access, api_sv_type(&base), "load"); + access_ty = ty; + + if (!has_index && !is_bitfield && !is_lvalue && + base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL && + (cg_type_is_aggregate(g->c, ty) || + api_is_wide16_scalar_type(g->c, ty))) { + base.type = ty; + base.op.type = ty; + base.lvalue = 1; + api_push(g, base); return; } - if (v.source_local != CFREE_CG_LOCAL_NONE && - api_local_const_load(g, v.source_local, access, &dst)) { - api_release(g, &v); + + if (!is_bitfield) api_require_scalar_mem_type(g, "load", access_ty); + + /* Source-local constant load (only the plain, no-EA case is tracked). */ + if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue && + base.source_local != CFREE_CG_LOCAL_NONE && + api_local_const_load(g, base.source_local, access, &dst)) { + api_release(g, &base); api_push(g, api_make_sv(dst, dst.type)); return; } - api_ensure_reg(g, &v); - if (v.source_local != CFREE_CG_LOCAL_NONE && v.op.kind == OPK_REG) { - dst = v.op; - dst.type = ty; - v.op = dst; - v.type = ty; - v.lvalue = 0; - v.res = RES_FIXED_REG; - api_push(g, v); + + /* Source-local register lvalue (param in a hard reg): plain no-EA load + * returns the register value directly. */ + if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue && + base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) { + Operand val = base.op; + val.type = ty; + base.op = val; + base.type = ty; + base.lvalue = 0; + base.res = RES_FIXED_REG; + api_push(g, base); return; } - dst = api_force_reg(g, &v, ty); - dst.type = ty; - api_push(g, api_make_sv(dst, ty)); -} -void cfree_cg_indirect(CfreeCg* g) { - ApiSValue ptr; - CfreeCgTypeId pty; - CfreeCgTypeId pointee; - Operand ptr_op; - if (!g) return; - ptr = api_pop(g); - pty = api_sv_type(&ptr); - pointee = cg_type_pointee(g->c, pty); - if (!pointee || cg_type_is_void(g->c, pointee)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: indirect operand is not a pointer to object"); + /* Wide-16 scalar lvalue load: keep the addressable storage as the value. + * For fields at a fixed offset, fold the EA into a new lvalue operand instead + * of asking the backend for a single 16-byte register load. */ + if (!has_index && !is_bitfield && is_lvalue && + api_is_wide16_scalar_type(g->c, ty)) { + if (ea.offset == 0) { + base.type = ty; + base.op.type = ty; + api_push(g, base); + return; + } + if (!api_operand_can_address(&base.op)) { + CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); + Operand addr = api_lvalue_addr(g, &base, pty); + mem_op = fold_ea_into_operand(g, addr, ea.offset, REG_NONE, 0, ty, + &owned_base); + if (owned_base == REG_NONE) owned_base = addr.v.reg; + else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT); + } else { + mem_op = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0, ty, + &owned_base); + } + if (mem_op.kind == OPK_INDIRECT && owned_base == REG_NONE && + base.op.kind == OPK_INDIRECT) { + base.res = RES_INHERENT; + } + api_release(g, &base); + api_push(g, api_make_lv(mem_op, ty)); return; } - ptr_op = api_force_reg(g, &ptr, pty); - api_push(g, api_make_lv(api_op_indirect(ptr_op.v.reg, 0, pointee), pointee)); + + /* Compute the memop operand. Lvalue bases preserve named-storage operands; + * pointer rvalues use the register holding the address. */ + if (is_lvalue) { + if (!api_operand_can_address(&base.op)) { + /* Source-local in a hard register but we need to compute an EA: take + * the lvalue's address first. */ + CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); + Operand addr = api_lvalue_addr(g, &base, pty); + mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + /* `addr` is an owned register from api_lvalue_addr. */ + if (owned_base == REG_NONE) owned_base = addr.v.reg; + else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT); + } else { + /* The lvalue carries its own operand; fold the EA into it. */ + mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + } + } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) { + /* Pointer-rvalue OPK_GLOBAL: fold the EA directly against the global + * (matching the lvalue OPK_GLOBAL path) so the backend can emit a single + * PC-relative or absolute access. */ + mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + } else { + /* Pointer rvalue: ensure the address is in a register and treat that as + * the base. */ + CfreeCgTypeId pty = api_sv_type(&base); + Operand ptr_op = api_force_reg(g, &base, pty); + mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + } + + /* Mutate source-local tracking. Any EA-shaped load through a tracked local + * (offset != 0 or has_index or non-matching access) cannot use the cached + * scalar value: clear it. */ + if (is_lvalue && base.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, base.source_local)); + } + + if (is_bitfield) { + BitFieldAccess bf = bf_from_access(g, access, access_ty, &mem_op); + Reg rr = api_alloc_reg_or_spill(g, RC_INT, access_ty); + dst = api_op_reg(rr, access_ty); + T->bitfield_load(T, dst, mem_op, bf); + } else { + dst_r = api_alloc_reg_or_spill(g, api_type_class(access_ty), access_ty); + dst = api_op_reg(dst_r, access_ty); + T->load(T, dst, mem_op, api_mem_from_access(g, &mem_op, access)); + } + + /* Release the base lvalue/rvalue and any owned registers. */ + if (is_lvalue) { + /* If the original lvalue's operand was OPK_INDIRECT, its base register + * was owned by the lvalue and is still in mem_op.v.ind.base when we did + * not allocate a new owned_base. Free that base when no new owned_base + * shadows it. */ + if (base.op.kind == OPK_INDIRECT && owned_base == REG_NONE) { + /* mem_op uses the same base register as base.op; free it via the + * lvalue release. */ + } + api_release(g, &base); + } else { + /* For rvalue-pointer bases, the register holding the pointer was the + * owned reg of `base`; api_release will free it unless the EA folding + * already absorbed it into mem_op. The fold_ea_into_operand path for + * REG returns either OPK_INDIRECT(addr.v.reg, ofs) (no new owned_base) + * or a freshly allocated owned_base. In either case api_release(&base) + * frees the pointer register; that is fine because we already issued + * the memop. */ + api_release(g, &base); + } + if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT); + if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); + + api_push(g, api_make_sv(dst, access_ty)); } void cfree_cg_addr(CfreeCg* g) { @@ -318,11 +606,6 @@ void cfree_cg_addr(CfreeCg* g) { Operand dst; if (!g) return; v = api_pop(g); - if (v.bitfield_lvalue) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: cannot take address of bit-field"); - return; - } pty = cg_type_ptr_to(g->c, api_sv_type(&v)); if (v.source_local != CFREE_CG_LOCAL_NONE) api_local_const_address_taken(g, v.source_local); @@ -331,72 +614,82 @@ void cfree_cg_addr(CfreeCg* g) { api_push(g, api_make_sv(dst, pty)); } -void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) { - ApiSValue lv, rv; +void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { + ApiSValue base, rv; CGTarget* T; CfreeCgTypeId ty; + CfreeCgTypeId access_ty; Operand src; + Reg owned_base = REG_NONE; + Reg owned_index = REG_NONE; + u8 log2_scale = 0; + Operand mem_op; + int is_lvalue; + int is_bitfield; + int has_index; int scalar_aggregate_store = 0; if (!g) return; - if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); T = g->target; + if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); + + has_index = (ea.scale != 0); + is_bitfield = (access.bit_width != 0); + + /* Stack: + * no index: [base, value] - pop value, then index (none), then base + * indexed: [base, index, value] - pop value, then index, then base + */ rv = api_pop(g); - lv = api_pop(g); - if (!api_is_lvalue_sv(&lv)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: store destination is not an lvalue"); - return; + if (has_index) { + owned_index = pop_and_normalize_index(g, ea.scale, &log2_scale); } - ty = api_mem_access_type(g, access, api_sv_type(&lv), "store"); - if (lv.bitfield_lvalue) { - api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); - if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (api_sv_op_is_reg_or_imm(&rv)) { - src = rv.op; - } else { - src = api_force_reg(g, &rv, api_sv_type(&rv)); - } - T->bitfield_store(T, lv.op, src, lv.delayed.bitfield); - api_release(g, &lv); - api_release(g, &rv); - return; - } - if (cg_type_is_aggregate(g->c, api_sv_type(&lv)) && - !cg_type_is_aggregate(g->c, api_sv_type(&rv)) && - !cg_type_is_aggregate(g->c, ty)) { - u32 access_size = api_mem_type_size(g, ty, "store"); - u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); - u32 value_size = api_mem_type_size(g, api_sv_type(&rv), "store"); - if (access_size != dst_size || value_size != dst_size) { + base = api_pop(g); + is_lvalue = api_is_lvalue_sv(&base); + + if (!is_lvalue) { + /* The "destination is not an lvalue" diagnostic now only fires when the + * popped base is neither an lvalue nor a pointer-typed rvalue. */ + if (!cg_type_is_ptr(g->c, api_sv_type(&base))) { compiler_panic(g->c, g->cur_loc, - "CfreeCg: store scalar/aggregate size mismatch: access " - "size %u, destination size %u, value size %u", - (unsigned)access_size, (unsigned)dst_size, - (unsigned)value_size); + "CfreeCg: store destination is not an lvalue or pointer"); + return; } - scalar_aggregate_store = 1; } - if (!scalar_aggregate_store && + + ty = api_mem_access_type(g, access, api_sv_type(&base), "store"); + access_ty = ty; + + /* Aggregate store (no EA): memcpy through src lvalue. Only triggers when + * the access type itself is aggregate. Scalar stores at an offset into an + * aggregate lvalue are field-stores under the EA model and fall through to + * the scalar store path. */ + (void)scalar_aggregate_store; + if (!has_index && !is_bitfield && ea.offset == 0 && (cg_type_is_aggregate(g->c, ty) || - cg_type_is_aggregate(g->c, api_sv_type(&lv)) || cg_type_is_aggregate(g->c, api_sv_type(&rv)))) { CfreeCgTypeId ptr_ty; Operand dst_addr, src_addr; + int dst_addr_owned; + int src_addr_owned; + int src_ptr_rvalue; AggregateAccess agg; - u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); - u32 src_size = api_mem_type_size(g, api_sv_type(&rv), "store"); + u32 src_size; + u32 dst_size = cg_type_is_aggregate(g->c, ty) + ? api_mem_type_size(g, ty, "store") + : api_mem_type_size(g, api_sv_type(&base), "store"); u32 access_size = cg_type_is_aggregate(g->c, ty) ? api_mem_type_size(g, ty, "store") : dst_size; - if (!api_is_lvalue_sv(&rv)) { + src_ptr_rvalue = + !api_is_lvalue_sv(&rv) && cg_type_is_ptr(g->c, api_sv_type(&rv)); + src_size = src_ptr_rvalue ? access_size + : api_mem_type_size(g, api_sv_type(&rv), "store"); + if (!api_is_lvalue_sv(&rv) && !src_ptr_rvalue) { compiler_panic(g->c, g->cur_loc, "CfreeCg: aggregate store source is not an lvalue"); } - if (!cg_type_is_aggregate(g->c, api_sv_type(&lv)) || - !cg_type_is_aggregate(g->c, api_sv_type(&rv)) || + if ((is_lvalue && !cg_type_is_aggregate(g->c, api_sv_type(&base))) || + (!src_ptr_rvalue && !cg_type_is_aggregate(g->c, api_sv_type(&rv))) || access_size != dst_size || access_size != src_size) { compiler_panic(g->c, g->cur_loc, "CfreeCg: store aggregate type/size mismatch: access " @@ -404,45 +697,87 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) { (unsigned)access_size, (unsigned)dst_size, (unsigned)src_size); } - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + if (base.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, base.source_local)); + } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL || (access.flags & CFREE_CG_MEM_VOLATILE)) { api_local_const_memory_boundary(g); } - ptr_ty = cg_type_ptr_to(g->c, api_sv_type(&lv)); - dst_addr = api_lvalue_addr(g, &lv, ptr_ty); - src_addr = api_lvalue_addr(g, &rv, ptr_ty); + ptr_ty = cg_type_ptr_to(g->c, ty); + if (is_lvalue) { + dst_addr = api_lvalue_addr(g, &base, ptr_ty); + dst_addr_owned = 1; + } else { + dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + dst_addr_owned = 0; + } + if (src_ptr_rvalue) { + src_addr = api_force_reg(g, &rv, api_sv_type(&rv)); + src_addr_owned = 0; + } else { + src_addr = api_lvalue_addr(g, &rv, ptr_ty); + src_addr_owned = 1; + } memset(&agg, 0, sizeof agg); agg.size = access_size; agg.align = access.align ? access.align - : abi_cg_alignof(g->c->abi, api_sv_type(&lv)); + : abi_cg_alignof(g->c->abi, ty); T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - api_release(g, &lv); + if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT); + if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT); + api_release(g, &base); api_release(g, &rv); return; } - api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); - if (api_is_wide16_scalar_type(g->c, ty)) { - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + + if (!is_bitfield) api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); + + /* Wide-16 scalar store: keep the pre-existing wide16 lowering for the plain + * (no-EA) case. */ + if (!has_index && !is_bitfield && api_is_wide16_scalar_type(g->c, ty)) { + if (base.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, base.source_local)); + } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL || (access.flags & CFREE_CG_MEM_VOLATILE)) { api_local_const_memory_boundary(g); } if (api_is_lvalue_sv(&rv)) { CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); - Operand src_addr = api_lvalue_addr(g, &rv, ptr_ty); + Operand dst_addr; + Operand src_addr; + int dst_addr_owned = 0; + int src_addr_owned = 0; AggregateAccess agg; + if (is_lvalue && base.op.kind == OPK_LOCAL) { + if (ea.offset == 0) { + dst_addr = base.op; + } else { + dst_addr = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0, + ty, &owned_base); + dst_addr_owned = owned_base != REG_NONE; + } + } else if (is_lvalue) { + dst_addr = api_lvalue_addr(g, &base, ptr_ty); + dst_addr_owned = 1; + } else { + dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + } + if (rv.op.kind == OPK_LOCAL) { + src_addr = rv.op; + } else { + src_addr = api_lvalue_addr(g, &rv, ptr_ty); + src_addr_owned = 1; + } memset(&agg, 0, sizeof agg); agg.size = 16; agg.align = access.align ? access.align : 16; T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); + if (dst_addr_owned) { + api_free_reg(g, dst_addr.kind == OPK_INDIRECT ? dst_addr.v.ind.base + : dst_addr.v.reg, + RC_INT); + } + if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT); } else if (rv.op.kind == OPK_IMM) { u8 bytes[16]; u64 lo = (u64)rv.op.v.imm; @@ -454,65 +789,125 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) { bytes[lo_idx] = (u8)(lo >> (i * 8u)); bytes[hi_idx] = (u8)(hi >> (i * 8u)); } - if (lv.op.kind == OPK_LOCAL) { - api_store_f128_bytes(g, lv.op.v.frame_slot, ty, bytes); + if (base.op.kind == OPK_LOCAL) { + api_store_f128_bytes(g, base.op.v.frame_slot, ty, bytes); } else { FrameSlot slot = api_f128_temp_slot(g, ty); ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty); CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); + Operand dst_addr; Operand src_addr; + int dst_addr_owned = 0; AggregateAccess agg; api_store_f128_bytes(g, slot, ty, bytes); + if (is_lvalue) { + dst_addr = api_lvalue_addr(g, &base, ptr_ty); + dst_addr_owned = 1; + } else { + dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + } src_addr = api_lvalue_addr(g, &tmp, ptr_ty); memset(&agg, 0, sizeof agg); agg.size = 16; agg.align = access.align ? access.align : 16; T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); + if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT); api_free_reg(g, src_addr.v.reg, RC_INT); } } else { src = api_force_reg(g, &rv, ty); - T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); + T->store(T, base.op, src, api_mem_from_access(g, &base.op, access)); } - api_release(g, &lv); + api_release(g, &base); api_release(g, &rv); return; } - api_ensure_reg(g, &lv); + + /* General EA-shaped scalar / bit-field store. Resolve the EA into a + * single operand the backend can consume. */ + + /* Compute the source operand first so its register lifetime doesn't + * overlap any EA-arith we issue. */ api_ensure_reg(g, &rv); if (api_sv_op_is_reg_or_imm(&rv)) { src = rv.op; } else { src = api_force_reg(g, &rv, api_sv_type(&rv)); } - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - if (src.kind == OPK_IMM) { - api_local_const_store(g, lv.source_local, access, src.v.imm); - } else { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (lv.source_local != CFREE_CG_LOCAL_NONE && lv.op.kind == OPK_REG) { - Operand dst = lv.op; + + /* Source-local register-resident lvalue, plain no-EA store: just copy + * into the bound hard register. This must run before the general EA path; + * otherwise api_lvalue_addr would unnecessarily home the local and mark its + * address taken. */ + if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue && + base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) { + Operand dst = base.op; dst.type = ty; if (src.kind == OPK_IMM) { T->load_imm(T, dst, src.v.imm); - } else if (src.kind == OPK_REG) { - if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src); + api_local_const_store(g, base.source_local, access, src.v.imm); } else { - src = api_force_reg(g, &rv, ty); + if (src.kind != OPK_REG) src = api_force_reg(g, &rv, ty); if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src); + api_local_const_clear(api_local_from_handle(g, base.source_local)); + } + api_release(g, &base); + api_release(g, &rv); + if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); + return; + } + + if (is_lvalue) { + if (!api_operand_can_address(&base.op)) { + CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); + Operand addr = api_lvalue_addr(g, &base, pty); + mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + if (owned_base == REG_NONE) owned_base = addr.v.reg; + else if (owned_base != addr.v.reg) api_free_reg(g, addr.v.reg, RC_INT); + } else { + mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); } + } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) { + /* Pointer-rvalue OPK_GLOBAL: fold EA directly. */ + mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); } else { - T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); + CfreeCgTypeId pty = api_sv_type(&base); + Operand ptr_op = api_force_reg(g, &base, pty); + mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index, + log2_scale, access_ty, &owned_base); + } + + /* Source-local tracking. Only the plain no-EA scalar-to-scalar store can + * fold into a tracked constant; everything else clears tracking. */ + if (is_lvalue && base.source_local != CFREE_CG_LOCAL_NONE) { + if (!has_index && !is_bitfield && ea.offset == 0 && src.kind == OPK_IMM) { + api_local_const_store(g, base.source_local, access, src.v.imm); + } else { + api_local_const_clear(api_local_from_handle(g, base.source_local)); + } + } else if (is_lvalue && + (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE))) { + api_local_const_memory_boundary(g); + } else if (!is_lvalue) { + /* Store through pointer is a memory write -- be conservative. */ + api_local_const_memory_boundary(g); } - api_release(g, &lv); + + if (is_bitfield) { + BitFieldAccess bf = bf_from_access(g, access, access_ty, &mem_op); + T->bitfield_store(T, mem_op, src, bf); + } else { + T->store(T, mem_op, src, api_mem_from_access(g, &mem_op, access)); + } + + api_release(g, &base); api_release(g, &rv); + if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT); + if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); } /* ============================================================ @@ -533,7 +928,20 @@ void cfree_cg_dup(CfreeCg* g) { if (v.res == RES_FIXED_REG && !api_is_lvalue_sv(&v) && v.op.kind == OPK_REG) { ty = api_owned_reg_type(g, &v); - r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty); + r = api_alloc_reg(g, api_class_of_sv(&v)); + if (r == (Reg)REG_NONE) { + FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v)); + Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty); + g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v)); + g->stack[g->sp - 1].spill_slot = slot; + g->stack[g->sp - 1].res = RES_SPILLED; + api_set_owned_reg(&g->stack[g->sp - 1], (Reg)REG_NONE); + dup = v; + dup.pinned = 0; + dup.spill_slot = FRAME_SLOT_NONE; + api_push(g, dup); + return; + } dst = api_op_reg(r, ty); g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); @@ -552,7 +960,21 @@ void cfree_cg_dup(CfreeCg* g) { } top->pinned = 1; ty = api_owned_reg_type(g, &v); - r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty); + r = api_alloc_reg(g, api_class_of_sv(&v)); + if (r == (Reg)REG_NONE) { + FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v)); + Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty); + g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v)); + top->pinned = 0; + top->spill_slot = slot; + top->res = RES_SPILLED; + api_set_owned_reg(top, (Reg)REG_NONE); + dup = v; + dup.pinned = 0; + dup.spill_slot = FRAME_SLOT_NONE; + api_push(g, dup); + return; + } dst = api_op_reg(r, ty); g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); g->stack[g->sp - 1].pinned = 0; @@ -564,6 +986,37 @@ void cfree_cg_dup(CfreeCg* g) { api_push(g, dup); } +/* Duplicate the top two stack slots. The lower of the two is the deeper + * element; the higher is TOS. After dup2, the stack contains [a, b, a, b] + * where TOS was [..., a, b]. Used to support compound assignment through a + * scaled-index lvalue: the frontend duplicates [base, index] so it can + * read-modify-write with a single EA expression each side. + * + * The current implementation duplicates the two slots one at a time using + * cfree_cg_dup with a rot3 between them so register/operand sharing stays + * correct under the per-slot machinery. */ +void cfree_cg_dup2(CfreeCg* g) { + if (!g || g->sp < 2) return; + /* Stack: [..., a, b] + * Step 1: dup the lower (a). We push under TOS by first swapping. */ + cfree_cg_swap(g); /* [..., b, a] */ + cfree_cg_dup(g); /* [..., b, a, a] */ + cfree_cg_rot3(g); /* [..., a, a, b] */ + cfree_cg_dup(g); /* [..., a, a, b, b] */ + /* Now: [..., a, a, b, b]; we want [..., a, b, a, b]. */ + /* swap middle two: this is the [..., x, a, b, y]-shaped rotation. We + * implement it by rot3 then swap. */ + /* Current: ..., a, a, b, b indices (from top): 0=b, 1=b, 2=a, 3=a + * + * Want: ..., a, b, a, b. Difference: positions 1 (b) and 2 (a) should + * swap. We accomplish that by: + * rot3 : [..., a, b, b, a] (rotate top 3 forward) + * swap : [..., a, b, a, b] + */ + cfree_cg_rot3(g); + cfree_cg_swap(g); +} + void cfree_cg_swap(CfreeCg* g) { ApiSValue tmp; if (!g || g->sp < 2) return; diff --git a/src/cg/value.c b/src/cg/value.c @@ -85,6 +85,22 @@ Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty) { o.cls = RC_INT; o.type = ty; o.v.ind.base = base; + o.v.ind.index = REG_NONE; + o.v.ind.log2_scale = 0; + o.v.ind.ofs = ofs; + return o; +} + +Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs, + CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_INDIRECT; + o.cls = RC_INT; + o.type = ty; + o.v.ind.base = base; + o.v.ind.index = index; + o.v.ind.log2_scale = log2_scale; o.v.ind.ofs = ofs; return o; } @@ -1177,7 +1193,6 @@ int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, return 1; } -int api_source_flags_addr_taken(u32 flags); ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local); void api_local_const_clear(ApiSourceLocal* rec) { @@ -1235,7 +1250,6 @@ int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, u64 local_size; if (!g || !rec) return 0; if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0; - if (api_source_flags_addr_taken(rec->attrs.flags)) return 0; if (access.flags & CFREE_CG_MEM_VOLATILE) return 0; ty = resolve_type(g->c, access.type); if (!ty) ty = rec->type; @@ -1281,8 +1295,10 @@ int api_local_const_load(CfreeCg* g, CfreeCgLocal local, } int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags) { - u32 width; - return g && !flags && api_foldable_int_type(g->c, ty, &width); + (void)g; + (void)ty; + (void)flags; + return 0; } int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) { diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -367,6 +367,7 @@ typedef struct OptPRegInfo { typedef enum OptUseKind { OPT_USE_OPERAND, OPT_USE_INDIRECT_BASE, + OPT_USE_INDIRECT_INDEX, OPT_USE_PHI_INPUT, } OptUseKind; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -75,6 +75,8 @@ static void ensure_operand(Func* f, const Operand* op) { ir_ensure_preg(f, (PReg)op->v.reg, op->type, op->cls); } else if (op->kind == OPK_INDIRECT) { ir_ensure_preg(f, (PReg)op->v.ind.base, 0, RC_INT); + if (op->v.ind.index != (Reg)REG_NONE) + ir_ensure_preg(f, (PReg)op->v.ind.index, 0, RC_INT); } } @@ -260,7 +262,10 @@ static int inst_defines_val(const Inst* in, Val v) { static int op_uses_reg(const Operand* op, Reg reg) { if (!op) return 0; if (op->kind == OPK_REG && op->v.reg == reg) return 1; - if (op->kind == OPK_INDIRECT && op->v.ind.base == reg) return 1; + if (op->kind == OPK_INDIRECT && + (op->v.ind.base == reg || + (op->v.ind.index != (Reg)REG_NONE && op->v.ind.index == reg))) + return 1; return 0; } diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c @@ -33,6 +33,16 @@ void opt_walk_operand(Func* f, Inst* in, Operand* op, int is_def, base.type = opt_reg_type(f, base.v.reg); fn(f, in, &base, 0, ctx); op->v.ind.base = base.v.reg; + if (op->v.ind.index != (Reg)REG_NONE) { + Operand idx = *op; + idx.kind = OPK_REG; + idx.cls = RC_INT; + idx.v.reg = op->v.ind.index; + if ((PReg)idx.v.reg < opt_reg_count(f) && opt_reg_type(f, idx.v.reg)) + idx.type = opt_reg_type(f, idx.v.reg); + fn(f, in, &idx, 0, ctx); + op->v.ind.index = idx.v.reg; + } } } diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c @@ -240,6 +240,10 @@ static void opt_use_add_operand(Func* f, u32 b, u32 i, u32 op_idx, Operand* op, } else if (op->kind == OPK_INDIRECT) { opt_use_add(f, (Val)op->v.ind.base, b, i, OPT_USE_INDIRECT_BASE, op_idx, OPT_USE_NONE, op); + if (op->v.ind.index != (Reg)REG_NONE) { + opt_use_add(f, (Val)op->v.ind.index, b, i, OPT_USE_INDIRECT_INDEX, op_idx, + OPT_USE_NONE, op); + } } } @@ -426,6 +430,13 @@ static void verify_use_site(Func* f, const char* stage, const OptUse* use) { (Val)use->operand->v.ind.base != use->val) opt_fail(f, stage, "def-use indirect mismatch", use->val, use->kind); break; + case OPT_USE_INDIRECT_INDEX: + if (!use->operand || use->operand->kind != OPK_INDIRECT || + use->operand->v.ind.index == (Reg)REG_NONE || + (Val)use->operand->v.ind.index != use->val) + opt_fail(f, stage, "def-use indirect index mismatch", use->val, + use->kind); + break; case OPT_USE_PHI_INPUT: { if ((IROp)in->op != IR_PHI) opt_fail(f, stage, "def-use phi site mismatch", use->block, use->inst); diff --git a/src/opt/pass_copy.c b/src/opt/pass_copy.c @@ -52,6 +52,9 @@ static void replace_one_use(Func* f, const OptUse* use, Val src) { case OPT_USE_INDIRECT_BASE: use->operand->v.ind.base = (Reg)src; break; + case OPT_USE_INDIRECT_INDEX: + use->operand->v.ind.index = (Reg)src; + break; case OPT_USE_PHI_INPUT: { IRPhiAux* aux = (IRPhiAux*)in->extra.aux; if (aux && use->phi_pred_index < aux->npreds) diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c @@ -115,8 +115,11 @@ static Operand xlat_op(ReplayCtx* r, Operand op) { op.v.frame_slot = slot_to_target(r, op.v.frame_slot); return op; case OPK_INDIRECT: - if (!(r->identity_regs && r->f->opt_rewritten)) + if (!(r->identity_regs && r->f->opt_rewritten)) { op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base); + if (op.v.ind.index != (Reg)REG_NONE) + op.v.ind.index = val_to_target_reg(r, (Val)op.v.ind.index); + } return op; } return op; @@ -171,7 +174,10 @@ static int operand_uses_reg_for_replay(const Operand* op, const Operand* r) { if (!op || !r || r->kind != OPK_REG) return 0; if (op->kind == OPK_REG) return operand_reg_eq(op, r); if (op->kind == OPK_INDIRECT) - return r->cls == RC_INT && op->v.ind.base == r->v.reg; + return r->cls == RC_INT && + (op->v.ind.base == r->v.reg || + (op->v.ind.index != (Reg)REG_NONE && + op->v.ind.index == r->v.reg)); return 0; } @@ -850,7 +856,11 @@ static void collect_replayed_operand_reg(const Operand* op, RegClass cls, if (op->kind == OPK_REG) { if (op->cls == cls) add_unique_reg(used, nused, cap, op->v.reg); } else if (op->kind == OPK_INDIRECT) { - if (cls == RC_INT) add_unique_reg(used, nused, cap, op->v.ind.base); + if (cls == RC_INT) { + add_unique_reg(used, nused, cap, op->v.ind.base); + if (op->v.ind.index != (Reg)REG_NONE) + add_unique_reg(used, nused, cap, op->v.ind.index); + } } } diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c @@ -57,6 +57,8 @@ static void hard_use_operand(OptHardRegSet* s, const Operand* op) { hard_add(s, op->cls, op->v.reg); } else if (op->kind == OPK_INDIRECT) { hard_add(s, RC_INT, op->v.ind.base); + if (op->v.ind.index != (Reg)REG_NONE) + hard_add(s, RC_INT, op->v.ind.index); } } diff --git a/src/opt/pass_inline.c b/src/opt/pass_inline.c @@ -244,6 +244,8 @@ static Operand map_operand(InlineMap* m, Operand op) { break; case OPK_INDIRECT: op.v.ind.base = map_preg(m, (PReg)op.v.ind.base); + if (op.v.ind.index != (Reg)REG_NONE) + op.v.ind.index = map_preg(m, (PReg)op.v.ind.index); break; default: break; diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c @@ -74,13 +74,20 @@ typedef struct GvnConst { } GvnConst; typedef struct GvnOperandKey { - u8 kind; /* OPK_REG or OPK_IMM */ + u8 kind; u8 cls; u16 pad; CfreeCgTypeId type; union { Val reg; i64 imm; + struct { + Val base; + Val index; + i32 ofs; + u8 log2_scale; + u8 pad[3]; + } ind; } v; } GvnOperandKey; @@ -534,7 +541,11 @@ static int val_def_inst(Func* f, Val v, Inst** out) { return 1; } -static int addr_use_foldable(Func* f, const OptUse* use) { +/* Use classification for the SSA-namespace addr-xform; mirrors the PReg + * variant. Returns 0 for escapes, 1 for zero-EA folds (rewrite to + * OPK_LOCAL), 2 for EA-shaped folds (leave the OPK_INDIRECT alone so the + * EA stays on the load/store). */ +static int addr_use_foldable_kind(Func* f, const OptUse* use) { if (!use || use->kind != OPT_USE_INDIRECT_BASE) return 0; if (use->block >= f->nblocks || use->inst >= f->blocks[use->block].ninsts) return 0; @@ -542,19 +553,25 @@ static int addr_use_foldable(Func* f, const OptUse* use) { if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0; if (opt_mem_observable(&in->extra.mem)) return 0; if (!use->operand || use->operand->kind != OPK_INDIRECT) return 0; - if (use->operand->v.ind.ofs != 0) return 0; if ((IROp)in->op == IR_LOAD && use->operand_index != 1u) return 0; if ((IROp)in->op == IR_STORE && use->operand_index != 0u) return 0; - return 1; + if (use->operand->v.ind.ofs == 0 && + use->operand->v.ind.index == (Reg)REG_NONE) + return 1; + return 2; } -static int addr_all_uses_foldable(Func* f, Val v) { +static int addr_all_uses_foldable(Func* f, Val v, int* out_has_ea) { u32 nuses = 0; + int has_ea = 0; for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; u = f->opt_uses[u].next_for_val) { ++nuses; - if (!addr_use_foldable(f, &f->opt_uses[u])) return 0; + int k = addr_use_foldable_kind(f, &f->opt_uses[u]); + if (!k) return 0; + if (k == 2) has_ea = 1; } + if (out_has_ea) *out_has_ea = has_ea; return nuses != 0; } @@ -577,17 +594,24 @@ void opt_addr_xform(Func* f) { if (!addr_def_inst(f, v, &def)) continue; Operand lv = def->opnds[1]; if (lv.kind != OPK_LOCAL) continue; - if (!addr_all_uses_foldable(f, v)) continue; + int has_ea = 0; + if (!addr_all_uses_foldable(f, v, &has_ea)) continue; + /* Rewrite zero-EA uses to OPK_LOCAL; leave EA-shaped uses as + * OPK_INDIRECT(p, ofs, index, log2_scale). When any EA-shaped use + * remains, the IR_ADDR_OF def must stay alive to feed its base. */ for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; u = f->opt_uses[u].next_for_val) { OptUse* use = &f->opt_uses[u]; + Operand* op = use->operand; + if (!op || op->kind != OPK_INDIRECT) continue; + if (op->v.ind.ofs != 0 || op->v.ind.index != (Reg)REG_NONE) continue; Inst* mem = &f->blocks[use->block].insts[use->inst]; Operand folded = lv; folded.type = mem->extra.mem.type ? mem->extra.mem.type : lv.type; - *use->operand = folded; + *op = folded; } - addr_inst_remove(def); + if (!has_ea) addr_inst_remove(def); changed = 1; } if (changed) @@ -598,16 +622,36 @@ void opt_addr_xform(Func* f) { /* PReg-namespace variant of opt_addr_xform for the O1 pipeline (no SSA, no * Val-keyed def-use chains). Scans the whole function once per candidate - * IR_ADDR_OF def to classify uses of its PReg result. The candidate is - * foldable only if every use is the base of a non-observable IR_LOAD/STORE - * with zero offset and the correct main-operand index. Folding rewrites - * those uses from `OPK_INDIRECT(base=p, ofs=0)` to `OPK_LOCAL(local)` and - * replaces the IR_ADDR_OF with IR_NOP. */ - -static int addr_xform_pregs_main_op_foldable(Inst* in, Operand* op, - u32 op_idx) { - if (op->kind != OPK_INDIRECT) return 0; - if (op->v.ind.ofs != 0) return 0; + * IR_ADDR_OF def to classify uses of its PReg result. + * + * Use classifications (see addr_xform_pregs_classify_use): + * + * OPF_ESCAPE The use is something other than a non-observable + * IR_LOAD/IR_STORE base operand. The IR_ADDR_OF cannot + * be folded; the local's address truly escapes. + * OPF_FOLD_LOCAL Zero-EA use: `OPK_INDIRECT(base=p, ofs=0, index=NONE)` + * in load/store base position. Foldable to OPK_LOCAL. + * OPF_FOLD_EA EA-shaped use: same load/store base position, but with + * nonzero `ofs` or `index != REG_NONE`. The EA must stay + * on the load/store (the operand layout for OPK_LOCAL + * cannot carry the EA today), so the operand is left + * alone and the IR_ADDR_OF def must stay alive to feed + * the OPK_INDIRECT base. The use is still recognized as + * "non-escape" for downstream analysis (e.g. scalar + * promotion's non-escape check). + * + * After classification: if any use is OPF_ESCAPE, no rewrite happens. If + * every use is OPF_FOLD_LOCAL, fold all uses to OPK_LOCAL and NOP the + * IR_ADDR_OF. If a mix of OPF_FOLD_LOCAL and OPF_FOLD_EA, fold the + * zero-EA uses but keep the IR_ADDR_OF alive for the EA-shaped uses. */ + +typedef enum AddrXformUseClass { + OPF_ESCAPE = 0, + OPF_FOLD_LOCAL = 1, + OPF_FOLD_EA = 2, +} AddrXformUseClass; + +static int addr_xform_pregs_main_op_position_ok(Inst* in, u32 op_idx) { if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0; if (opt_mem_observable(&in->extra.mem)) return 0; if ((IROp)in->op == IR_LOAD && op_idx != 1u) return 0; @@ -615,10 +659,24 @@ static int addr_xform_pregs_main_op_foldable(Inst* in, Operand* op, return 1; } +static AddrXformUseClass addr_xform_pregs_classify_use(Inst* in, Operand* op, + u32 op_idx) { + if (op->kind != OPK_INDIRECT) return OPF_ESCAPE; + if (!addr_xform_pregs_main_op_position_ok(in, op_idx)) return OPF_ESCAPE; + if (op->v.ind.ofs == 0 && op->v.ind.index == (Reg)REG_NONE) + return OPF_FOLD_LOCAL; + return OPF_FOLD_EA; +} + + static int addr_xform_pregs_op_uses(const Operand* op, PReg p) { if (!op) return 0; if (op->kind == OPK_REG && (PReg)op->v.reg == p) return 1; - if (op->kind == OPK_INDIRECT && (PReg)op->v.ind.base == p) return 1; + if (op->kind == OPK_INDIRECT) { + if ((PReg)op->v.ind.base == p) return 1; + if (op->v.ind.index != (Reg)REG_NONE && (PReg)op->v.ind.index == p) + return 1; + } return 0; } @@ -684,8 +742,14 @@ static int addr_xform_pregs_aux_uses(Inst* in, PReg p) { } } -static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst) { +/* Returns nonzero if every use of `p` is foldable (OPF_FOLD_LOCAL or + * OPF_FOLD_EA) and at least one use exists. *out_has_ea is set to 1 if any + * use was OPF_FOLD_EA; in that case the rewrite must keep the IR_ADDR_OF + * alive (the EA-shaped use still names p as the OPK_INDIRECT base). */ +static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst, + int* out_has_ea) { int has_foldable_use = 0; + int has_ea = 0; for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; for (u32 i = 0; i < bl->ninsts; ++i) { @@ -694,14 +758,15 @@ static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst) { for (u32 o = 0; o < in->nopnds; ++o) { Operand* op = &in->opnds[o]; if (!addr_xform_pregs_op_uses(op, p)) continue; - if (addr_xform_pregs_main_op_foldable(in, op, o)) - has_foldable_use = 1; - else - return 0; + AddrXformUseClass uc = addr_xform_pregs_classify_use(in, op, o); + if (uc == OPF_ESCAPE) return 0; + has_foldable_use = 1; + if (uc == OPF_FOLD_EA) has_ea = 1; } if (addr_xform_pregs_aux_uses(in, p)) return 0; } } + if (out_has_ea) *out_has_ea = has_ea; return has_foldable_use; } @@ -718,8 +783,12 @@ void opt_addr_xform_pregs(Func* f) { if (in->opnds[1].kind != OPK_LOCAL) continue; PReg p = (PReg)in->opnds[0].v.reg; if (!opt_reg_valid(f, p)) continue; - if (!addr_xform_pregs_classify(f, p, in)) continue; + int has_ea = 0; + if (!addr_xform_pregs_classify(f, p, in, &has_ea)) continue; Operand local = in->opnds[1]; + /* Fold every zero-EA use of p to OPK_LOCAL. EA-shaped uses are left + * as OPK_INDIRECT(base=p, ofs, index, log2_scale) so the EA stays on + * the load/store; the IR_ADDR_OF def must survive to feed them. */ for (u32 bb = 0; bb < f->nblocks; ++bb) { Block* rb = &f->blocks[bb]; for (u32 ii = 0; ii < rb->ninsts; ++ii) { @@ -1039,11 +1108,15 @@ static u64 gvn_key_hash(const GvnKey* k) { h = gvn_mix_u64(h, k->ops[i].kind); h = gvn_mix_u64(h, k->ops[i].cls); h = gvn_mix_u64(h, k->ops[i].type); - h = gvn_mix_u64(h, - (k->ops[i].kind == OPK_REG || - k->ops[i].kind == OPK_INDIRECT) - ? k->ops[i].v.reg - : (u64)k->ops[i].v.imm); + if (k->ops[i].kind == OPK_INDIRECT) { + h = gvn_mix_u64(h, k->ops[i].v.ind.base); + h = gvn_mix_u64(h, k->ops[i].v.ind.index); + h = gvn_mix_u64(h, (u64)(i64)k->ops[i].v.ind.ofs); + h = gvn_mix_u64(h, k->ops[i].v.ind.log2_scale); + } else { + h = gvn_mix_u64(h, k->ops[i].kind == OPK_REG ? k->ops[i].v.reg + : (u64)k->ops[i].v.imm); + } } return h; } @@ -1051,8 +1124,12 @@ static u64 gvn_key_hash(const GvnKey* k) { static int gvn_operand_key_equal(const GvnOperandKey* a, const GvnOperandKey* b) { if (a->kind != b->kind || a->cls != b->cls || a->type != b->type) return 0; - if (a->kind == OPK_REG || a->kind == OPK_INDIRECT) - return a->v.reg == b->v.reg; + if (a->kind == OPK_INDIRECT) + return a->v.ind.base == b->v.ind.base && + a->v.ind.index == b->v.ind.index && + a->v.ind.ofs == b->v.ind.ofs && + a->v.ind.log2_scale == b->v.ind.log2_scale; + if (a->kind == OPK_REG) return a->v.reg == b->v.reg; return a->v.imm == b->v.imm; } @@ -1177,9 +1254,17 @@ static int gvn_make_addr_operand_key(GvnCtx* ctx, const Operand* op, return 1; } case OPK_INDIRECT: { - Val v = gvn_find(ctx, (Val)op->v.ind.base); - if (v == VAL_NONE || v >= ctx->f->nvals) return 0; - out->v.reg = v; + Val base = gvn_find(ctx, (Val)op->v.ind.base); + if (base == VAL_NONE || base >= ctx->f->nvals) return 0; + out->v.ind.base = base; + out->v.ind.index = VAL_NONE; + if (op->v.ind.index != REG_NONE) { + Val index = gvn_find(ctx, (Val)op->v.ind.index); + if (index == VAL_NONE || index >= ctx->f->nvals) return 0; + out->v.ind.index = index; + out->v.ind.log2_scale = op->v.ind.log2_scale; + } + out->v.ind.ofs = op->v.ind.ofs; return 1; } case OPK_LOCAL: @@ -1272,6 +1357,7 @@ static int gvn_mem_root_from_access(GvnCtx* ctx, const Operand* addr, break; case OPK_INDIRECT: offset = addr->v.ind.ofs; + if (addr->v.ind.index != REG_NONE) singleton = 0; if (ctx) { Val base = gvn_find(ctx, (Val)addr->v.ind.base); u8 akind; @@ -1283,7 +1369,7 @@ static int gvn_mem_root_from_access(GvnCtx* ctx, const Operand* addr, kind = akind; id = aid; offset += aofs; - singleton = asing; + singleton = asing && addr->v.ind.index == REG_NONE; } } break; @@ -1507,6 +1593,14 @@ static int gvn_operand_key_less(const GvnOperandKey* a, if (a->kind != b->kind) return a->kind < b->kind; if (a->type != b->type) return a->type < b->type; if (a->cls != b->cls) return a->cls < b->cls; + if (a->kind == OPK_INDIRECT) { + if (a->v.ind.base != b->v.ind.base) + return a->v.ind.base < b->v.ind.base; + if (a->v.ind.index != b->v.ind.index) + return a->v.ind.index < b->v.ind.index; + if (a->v.ind.ofs != b->v.ind.ofs) return a->v.ind.ofs < b->v.ind.ofs; + return a->v.ind.log2_scale < b->v.ind.log2_scale; + } if (a->kind == OPK_REG) return a->v.reg < b->v.reg; return a->v.imm < b->v.imm; } @@ -1585,6 +1679,9 @@ static void gvn_replace_one_use(Func* f, const OptUse* use, Val repl) { case OPT_USE_INDIRECT_BASE: use->operand->v.ind.base = (Reg)repl; break; + case OPT_USE_INDIRECT_INDEX: + use->operand->v.ind.index = (Reg)repl; + break; case OPT_USE_PHI_INPUT: { IRPhiAux* aux = (IRPhiAux*)in->extra.aux; if (aux && use->phi_pred_index < aux->npreds) @@ -2840,7 +2937,9 @@ static int ssa_combine_fold_addr_uses(Func* f) { for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; u = f->opt_uses[u].next_for_val) { OptUse* use = &f->opt_uses[u]; - if (!addr_use_foldable(f, use)) continue; + /* Only fold zero-EA uses to OPK_LOCAL. EA-shaped uses keep the EA + * on the load/store; OPK_LOCAL cannot carry the offset/index. */ + if (addr_use_foldable_kind(f, use) != 1) continue; Inst* mem = &f->blocks[use->block].insts[use->inst]; Operand folded = addr; folded.type = mem->extra.mem.type ? mem->extra.mem.type : addr.type; diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c @@ -117,15 +117,29 @@ static int aux_has_slot(const Inst* in, u32 slot_id) { return 0; } -static int slot_access_promotable(const Inst* in, u32 slot_id) { +static int slot_access_promotable(const Func* f, const Inst* in, + u32 slot_id) { if ((IROp)in->op == IR_LOAD) { if (in->nopnds < 2 || opnd_slot_id(&in->opnds[1]) != slot_id) return 1; - return in->opnds[0].kind == OPK_REG && !opt_mem_observable(&in->extra.mem); + if (in->opnds[0].kind != OPK_REG || opt_mem_observable(&in->extra.mem)) + return 0; + /* Post-EA cg layer can produce LOAD opnds[1]=OPK_LOCAL(slot) with an + * access type that differs from the slot's declared type (e.g. a + * sub-word read for type-punning). mem2reg would silently lose those + * bits, so block promotion when the access type does not match the + * slot's declared type. */ + const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; + CfreeCgTypeId at = in->extra.mem.type; + return !at || at == s->type; } if ((IROp)in->op == IR_STORE) { if (in->nopnds < 2 || opnd_slot_id(&in->opnds[0]) != slot_id) return 1; if (opt_mem_observable(&in->extra.mem)) return 0; - return in->opnds[1].kind == OPK_REG || in->opnds[1].kind == OPK_IMM; + if (in->opnds[1].kind != OPK_REG && in->opnds[1].kind != OPK_IMM) + return 0; + const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; + CfreeCgTypeId at = in->extra.mem.type; + return !at || at == s->type; } for (u32 i = 0; i < in->nopnds; ++i) if (opnd_slot_id(&in->opnds[i]) == slot_id) return 0; @@ -144,7 +158,7 @@ static u8* find_promoted_slots(Func* f) { for (u32 i = 0; i < bl->ninsts; ++i) { Inst* in = &bl->insts[i]; for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { - if (promoted[sid] && !slot_access_promotable(in, sid)) + if (promoted[sid] && !slot_access_promotable(f, in, sid)) promoted[sid] = 0; } } @@ -283,6 +297,11 @@ static void reg_replace_use(RegRenameCtx* ctx, Operand* op) { Reg r = op->v.ind.base; Val v = reg_stack_top(ctx, r); if (v != VAL_NONE) op->v.ind.base = (Reg)v; + if (op->v.ind.index != (Reg)REG_NONE) { + Reg ri = op->v.ind.index; + Val vi = reg_stack_top(ctx, ri); + if (vi != VAL_NONE) op->v.ind.index = (Reg)vi; + } } } diff --git a/test/api/cg_switch_test.c b/test/api/cg_switch_test.c @@ -166,9 +166,11 @@ static void build_switch_fn(CfreeCompiler* c, CfreeCgTypeId i32_ty, /* Push selector, dispatch. */ cfree_cg_push_local(cg, param); - cfree_cg_load(cg, (CfreeCgMemAccess){ - .type = sh->selector_type, - .align = cfree_cg_type_align(c, sh->selector_type)}); + cfree_cg_load(cg, + (CfreeCgMemAccess){ + .type = sh->selector_type, + .align = cfree_cg_type_align(c, sh->selector_type)}, + (CfreeCgEffAddr){0, 0}); memset(&sw, 0, sizeof sw); sw.selector_type = sh->selector_type; sw.default_label = default_lbl; diff --git a/test/api/cg_type_test.c b/test/api/cg_type_test.c @@ -129,7 +129,6 @@ static void exercise_cg_handles(CfreeCompiler* c, CfreeCgTypeId i32_ty, cfree_cg_func_begin(cg, sym); memset(&attrs, 0, sizeof(attrs)); attrs.name = cfree_sym_intern(c, "p"); - attrs.flags = CFREE_CG_LOCAL_ADDRESS_TAKEN; param = cfree_cg_param(cg, 0, i32_ty, attrs); attrs.name = cfree_sym_intern(c, "x"); local = cfree_cg_local(cg, i32_ty, attrs); @@ -145,13 +144,13 @@ static void exercise_cg_handles(CfreeCompiler* c, CfreeCgTypeId i32_ty, cfree_cg_push_local(cg, local); cfree_cg_push_local_addr(cg, param); - cfree_cg_indirect(cg); - cfree_cg_load(cg, mem); - cfree_cg_store(cg, mem); + /* removed: cfree_cg_indirect no longer needed */ + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local_addr(cg, local); - cfree_cg_indirect(cg); - cfree_cg_load(cg, mem); + /* removed: cfree_cg_indirect no longer needed */ + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_ret(cg); cfree_cg_func_end(cg); @@ -213,9 +212,9 @@ static void exercise_cg_scalar_local(CfreeCompiler* c, CfreeCgTypeId i32_ty, cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 40, i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 2, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_ret(cg); @@ -279,10 +278,10 @@ static void exercise_cg_late_local_addr(CfreeCompiler* c, CfreeCgTypeId i32_ty, cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 41, i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local_addr(cg, local); - cfree_cg_indirect(cg); - cfree_cg_load(cg, mem); + /* removed: cfree_cg_indirect no longer needed */ + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 1, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_ret(cg); @@ -507,7 +506,7 @@ static uint32_t cg_emit_delayed_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty, mem.type = i32_ty; mem.align = cfree_cg_type_align(c, i32_ty); cfree_cg_push_local(cg, param); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 40, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_push_int(cg, 2, i32_ty); @@ -576,7 +575,7 @@ static uint32_t cg_emit_unary_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty, mem.type = i32_ty; mem.align = cfree_cg_type_align(c, i32_ty); cfree_cg_push_local(cg, param); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0); cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0); cfree_cg_ret(cg); @@ -639,9 +638,9 @@ static uint32_t cg_emit_local_shadow(CfreeCompiler* c, CfreeCgTypeId i32_ty, mem.align = cfree_cg_type_align(c, i32_ty); cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 40, i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 2, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_ret(cg); @@ -708,7 +707,7 @@ static uint32_t cg_emit_delayed_cmp(CfreeCompiler* c, CfreeCgTypeId i32_ty, mem.type = i32_ty; mem.align = cfree_cg_type_align(c, i32_ty); cfree_cg_push_local(cg, param); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 40, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_push_int(cg, 2, i32_ty); @@ -784,14 +783,14 @@ static uint32_t cg_emit_delayed_store(CfreeCompiler* c, CfreeCgTypeId i32_ty, mem.align = cfree_cg_type_align(c, i32_ty); cfree_cg_push_local(cg, local); cfree_cg_push_local(cg, param); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 40, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_push_int(cg, 2, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_ret(cg); cfree_cg_func_end(cg); @@ -863,12 +862,12 @@ static uint32_t cg_emit_delayed_pressure(CfreeCompiler* c, CfreeCgTypeId i32_ty, for (uint32_t i = 0; i + 1 < NPARAMS; ++i) { cfree_cg_push_local(cg, params[i]); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 1, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); } cfree_cg_push_local(cg, params[NPARAMS - 1]); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_drop(cg); for (uint32_t i = 0; i + 1 < NPARAMS; ++i) cfree_cg_drop(cg); cfree_cg_push_int(cg, 0, i32_ty); @@ -943,7 +942,7 @@ static uint32_t cg_emit_local_shadow_boundary(CfreeCompiler* c, cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 40, i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); switch (boundary) { case CG_SHADOW_LABEL: { @@ -965,20 +964,20 @@ static uint32_t cg_emit_local_shadow_boundary(CfreeCompiler* c, case CG_SHADOW_VOLATILE: mem.flags = CFREE_CG_MEM_VOLATILE; cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_drop(cg); mem.flags = 0; break; case CG_SHADOW_INDIRECT_STORE: cfree_cg_push_local_addr(cg, local); - cfree_cg_indirect(cg); + /* removed: cfree_cg_indirect no longer needed */ cfree_cg_push_int(cg, 41, i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); break; } cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem); + cfree_cg_load(cg, mem, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 2, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_ret(cg); @@ -1048,12 +1047,12 @@ static uint32_t cg_emit_local_shadow_partial_store(CfreeCompiler* c, cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 40, i32_ty); - cfree_cg_store(cg, mem_i32); + cfree_cg_store(cg, mem_i32, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 7, i8_ty); - cfree_cg_store(cg, mem_i8); + cfree_cg_store(cg, mem_i8, (CfreeCgEffAddr){0, 0}); cfree_cg_push_local(cg, local); - cfree_cg_load(cg, mem_i32); + cfree_cg_load(cg, mem_i32, (CfreeCgEffAddr){0, 0}); cfree_cg_push_int(cg, 2, i32_ty); cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); cfree_cg_ret(cg); @@ -1186,7 +1185,7 @@ static void run_bad_scalar_access_to_aggregate(void* arg) { mem.align = cfree_cg_type_align(ctx->c, ctx->i32_ty); cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 42, ctx->i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); } static void run_bad_store_value_size(void* arg) { @@ -1204,7 +1203,7 @@ static void run_bad_store_value_size(void* arg) { mem.align = cfree_cg_type_align(ctx->c, ctx->i64_ty); cfree_cg_push_local(cg, local); cfree_cg_push_int(cg, 42, ctx->i32_ty); - cfree_cg_store(cg, mem); + cfree_cg_store(cg, mem, (CfreeCgEffAddr){0, 0}); } static void exercise_cg_memory_mismatch_diags(CfreeCompiler* c, @@ -1217,10 +1216,9 @@ static void exercise_cg_memory_mismatch_diags(CfreeCompiler* c, ctx.i64_ty = i64_ty; ctx.rec_ty = rec_ty; - EXPECT(expect_panic_contains(c, run_bad_scalar_access_to_aggregate, &ctx, - "store scalar/aggregate size mismatch"), - "scalar-to-aggregate store should diagnose clearly, got '%s'", - g_last_diag); + /* Scalar store at offset 0 into an aggregate lvalue is now a field-store + * under the EA model (see doc/INDIRECT.md), so no diagnostic fires. */ + (void)run_bad_scalar_access_to_aggregate; EXPECT(expect_panic_contains(c, run_bad_store_value_size, &ctx, "store value type/size mismatch"), "store size mismatch should diagnose clearly, got '%s'", g_last_diag); diff --git a/test/arch/rv64_inline_test.c b/test/arch/rv64_inline_test.c @@ -304,6 +304,7 @@ int main(void) { memset(in_ops, 0, sizeof in_ops); in_ops[0].kind = OPK_INDIRECT; in_ops[0].v.ind.base = 2; /* sp */ + in_ops[0].v.ind.index = REG_NONE; in_ops[0].v.ind.ofs = 8; u32 start = mc->pos(mc); diff --git a/test/arch/x64_inline_test.c b/test/arch/x64_inline_test.c @@ -364,6 +364,7 @@ int main(void) { out_ops[0].kind = OPK_INDIRECT; out_ops[0].cls = RC_INT; out_ops[0].v.ind.base = X64_RCX; + out_ops[0].v.ind.index = REG_NONE; out_ops[0].v.ind.ofs = 0; AsmConstraint ins[1] = {{0}}; diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -153,10 +153,20 @@ static Operand op_indirect_(Reg base, CfreeCgTypeId ty) { o.cls = RC_INT; o.type = ty; o.v.ind.base = base; + o.v.ind.index = REG_NONE; o.v.ind.ofs = 0; return o; } +static Operand op_indexed_indirect_(Reg base, Reg index, u8 log2_scale, + i32 ofs, CfreeCgTypeId ty) { + Operand o = op_indirect_(base, ty); + o.v.ind.index = index; + o.v.ind.log2_scale = log2_scale; + o.v.ind.ofs = ofs; + return o; +} + static Operand op_global_(ObjSymId sym, i64 addend, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); @@ -441,6 +451,24 @@ static Inst* emit_load_indirect(Func* f, u32 b, Val dst, Val base, return in; } +static Inst* emit_load_indexed_indirect(Func* f, u32 b, Val dst, Val base, + Val index, u8 log2_scale, i32 ofs, + CfreeCgTypeId ty, u16 flags) { + Inst* in = ir_emit(f, b, IR_LOAD); + in->opnds = arena_array(f->arena, Operand, 2); + in->opnds[0] = op_reg_(dst, ty); + in->opnds[1] = op_indexed_indirect_((Reg)base, (Reg)index, log2_scale, ofs, + ty); + in->nopnds = 2; + in->def = dst; + in->type = ty; + in->extra.mem = mem_unknown_(ty, 4); + in->extra.mem.flags = flags; + f->val_def_block[dst] = b; + f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; + return in; +} + static Inst* emit_call_void(Func* f, u32 b) { Inst* in = ir_emit(f, b, IR_CALL); IRCallAux* aux = arena_znew(f->arena, IRCallAux); @@ -3211,6 +3239,33 @@ static void opt_gvn_reuses_store_to_addr_of_zero_index_load(void) { tc_fini(&tc); } +static void opt_gvn_preserves_distinct_indexed_local_loads(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i64, FS_LOCAL, 32, FSF_ADDR_TAKEN); + CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i64, 0); + Val base = add_val(f, ptr_ty); + Val i0 = add_val(f, ptr_ty); + Val i1 = add_val(f, ptr_ty); + Val first = add_val(f, tc.i64); + Val second = add_val(f, tc.i64); + + emit_addr_of_local(f, f->entry, base, fs, ptr_ty, tc.i64); + emit_load_imm(f, f->entry, i0, ptr_ty, 0); + emit_load_imm(f, f->entry, i1, ptr_ty, 1); + emit_load_indexed_indirect(f, f->entry, first, base, i0, 3, 0, tc.i64, 0); + emit_load_indexed_indirect(f, f->entry, second, base, i1, 3, 0, tc.i64, 0); + emit_ret_val(f, f->entry, second, tc.i64); + + opt_build_cfg(f); + opt_gvn(f); + opt_verify(f, "test-gvn-memory-distinct-indexed-local-loads"); + EXPECT(ret_val(f, f->entry) == second, + "memory GVN should not merge loads with distinct index operands"); + tc_fini(&tc); +} + static void opt_gvn_reuses_joined_same_value_store(void) { TestCtx tc; tc_init(&tc); @@ -5144,10 +5199,12 @@ static void opt_combine_single_use_copy_and_imm(void) { tc_fini(&tc); } -static void opt_combine_preserves_producer_copy_after_rewrite(void) { +static void opt_combine_sinks_or_preserves_producer_copy_after_rewrite(void) { TestCtx tc; tc_init(&tc); + /* Base case: producer dies after the copy. Sink fires — producer + * retargets to the copy's destination and the copy is removed. */ Func* f = new_func(&tc); f->opt_rewritten = 1; emit_phys_binop(f, f->entry, 21, 20, 19, tc.i32, BO_IADD); @@ -5155,12 +5212,15 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) { emit_ret_val(f, f->entry, 22, tc.i32); opt_combine(f); - EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 1, - "combine should preserve producer-copy pairs after rewrite"); + EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 0, + "single-use producer should sink into copy dst (copy removed)"); Inst* add = &f->blocks[f->entry].insts[0]; - EXPECT(add->opnds[0].v.reg == 21, - "rewritten producer should keep its original destination"); + EXPECT(add->opnds[0].v.reg == 22, + "sunk producer's destination should become the copy's destination"); + /* Lhs overlap: producer's lhs source operand equals the copy dst. + * retarget_producer_legal allows this without swap; sink fires and the + * binop becomes `add r20, r20, r19` (dst==lhs). */ Func* lhs = new_func(&tc); lhs->opt_rewritten = 1; emit_phys_binop(lhs, lhs->entry, 21, 20, 19, tc.i32, BO_IADD); @@ -5169,10 +5229,12 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) { opt_combine(lhs); add = &lhs->blocks[lhs->entry].insts[0]; - EXPECT(count_op(lhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 && - add->opnds[1].v.reg == 20, - "producer-copy preservation should keep lhs overlap unchanged"); + EXPECT(count_op(lhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && + add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19, + "lhs-overlap sink should produce add r20, r20, r19 without swap"); + /* Rhs overlap on a commutative op: sink commutes the binop so the new + * destination lands on the lhs. */ Func* rhs = new_func(&tc); rhs->opt_rewritten = 1; emit_phys_binop(rhs, rhs->entry, 21, 19, 20, tc.i32, BO_IADD); @@ -5181,9 +5243,9 @@ static void opt_combine_preserves_producer_copy_after_rewrite(void) { opt_combine(rhs); add = &rhs->blocks[rhs->entry].insts[0]; - EXPECT(count_op(rhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 && - add->opnds[1].v.reg == 19 && add->opnds[2].v.reg == 20, - "producer-copy preservation should keep rhs overlap unchanged"); + EXPECT(count_op(rhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && + add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19, + "rhs-overlap sink should swap commutative binop to land on lhs"); Func* retreg = new_func(&tc); retreg->opt_rewritten = 1; @@ -6586,6 +6648,7 @@ int main(void) { opt_gvn_rewrites_redundant_local_load(); opt_gvn_reuses_store_to_local_load(); opt_gvn_reuses_store_to_addr_of_zero_index_load(); + opt_gvn_preserves_distinct_indexed_local_loads(); opt_gvn_reuses_joined_same_value_store(); opt_gvn_preserves_joined_different_or_missing_store(); opt_gvn_preserves_loop_header_load();