Support explicit register asm operands - kit

commit 84863072e412579624a0d6c9f304344ebc4b1289
parent 2d37ba7b367e02cf86618cfa9e3a7a92d516e7ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 14:53:02 -0700

Support explicit register asm operands

Diffstat:
M include/kit/cg.h  | 6 ++++++
M lang/c/parse/cg_adapter.c  | 2 ++
M lang/c/parse/cg_adapter.h  | 1 +
M lang/c/parse/parse.c  | 14 +++++++++++---
M lang/c/parse/parse_priv.h  | 6 ++++++
M lang/c/parse/parse_stmt.c  | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
M lang/c/parse/parse_type.c  | 31 +++++++++++++++++++++++++------
M src/arch/aa64/native.c  | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M src/arch/c_target/c_emit.c  | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
M src/arch/native_target.h  | 13 ++++++++++++-
M src/arch/rv64/native.c  | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M src/arch/wasm/emit.c  | 8 ++++++++
M src/arch/x64/emit.c  | 15 +--------------
M src/arch/x64/emit.h  | 3 ---
M src/arch/x64/native.c  | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M src/cg/asm.c  | 18 +++++++++++++++++-
M src/cg/cgtarget.h  | 4 ++++
M src/cg/native_asm.c  | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/cg/native_asm.h  | 25 +++++++++++++++++++++++++
M src/opt/pass_lower.c  | 7 ++++++-
M src/opt/pass_machinize.c  | 54 ++++++++++++++++++++++--------------------------------
M test/arch/x64_inline_test.c  | 27 +++++++++++++++++++++++++++
A test/parse/cases/asm_03_register_operand.c  | 37 +++++++++++++++++++++++++++++++++++++
A test/parse/cases/asm_03_register_operand.expected  | 1 +
A test/parse/cases/asm_03_register_operand.wasm.skip  | 1 +
A test/parse/cases/asm_04_register_callee_saved.c  | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A test/parse/cases/asm_04_register_callee_saved.expected  | 1 +
A test/parse/cases/asm_04_register_callee_saved.wasm.skip  | 1 +
A test/parse/cases/asm_05_register_label_scope.c  | 10 ++++++++++
A test/parse/cases/asm_05_register_label_scope.expected  | 1 +
A test/parse/cases_err/asm_register_bad_constraint.c  | 13 +++++++++++++
A test/parse/cases_err/asm_register_bad_constraint.errpat  | 1 +
A test/parse/cases_err/asm_register_bad_name.c  | 5 +++++
A test/parse/cases_err/asm_register_bad_name.errpat  | 1 +
A test/parse/cases_err/asm_register_class_mismatch.c  | 13 +++++++++++++
A test/parse/cases_err/asm_register_class_mismatch.errpat  | 1 +
A test/parse/cases_err/asm_register_forbidden.c  | 13 +++++++++++++
A test/parse/cases_err/asm_register_forbidden.errpat  | 1 +

38 files changed, 1063 insertions(+), 244 deletions(-)
diff --git a/include/kit/cg.h b/include/kit/cg.h
@@ -1032,6 +1032,12 @@ typedef struct KitCgAsmOperand {
   KitSym constraint; /* interned target constraint string */
   KitSym name;       /* interned symbolic operand name; 0 if absent */
   KitCgTypeId type;
+  /* Explicit hard register this operand must occupy, named by its target
+   * spelling ("r10", "x8", "a7", ...); 0 when unconstrained. Set by a frontend
+   * for a GNU local register variable (`register T x __asm__("r10")`) used as
+   * an operand. The name is opaque to the frontend and CG — only the target's
+   * register file resolves it to a physical register. */
+  KitSym reg;
   uint8_t dir; /* KitCgAsmDir */
   uint8_t pad[3];
 } KitCgAsmOperand;
diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c
@@ -1158,6 +1158,7 @@ void pcg_inline_asm(Parser* p, const char* tmpl, const AsmConstraint* outs,
           kit_sym_intern(p->c, kit_slice_cstr(outs[i].str ? outs[i].str : ""));
       o[i].name = outs[i].name;
       o[i].type = pcg_tid(p, outs[i].type);
+      o[i].reg = outs[i].reg;
       o[i].dir = KIT_CG_ASM_OUT;
     }
   }
@@ -1168,6 +1169,7 @@ void pcg_inline_asm(Parser* p, const char* tmpl, const AsmConstraint* outs,
           kit_sym_intern(p->c, kit_slice_cstr(ins[i].str ? ins[i].str : ""));
       in[i].name = ins[i].name;
       in[i].type = pcg_tid(p, ins[i].type);
+      in[i].reg = ins[i].reg;
       in[i].dir = (ins[i].dir == ASM_INOUT) ? KIT_CG_ASM_INOUT : KIT_CG_ASM_IN;
     }
   }
diff --git a/lang/c/parse/cg_adapter.h b/lang/c/parse/cg_adapter.h
@@ -171,6 +171,7 @@ typedef struct AsmConstraint {
   const char* str;
   Sym name;
   const Type* type;
+  Sym reg; /* hard-register name for a GNU local register variable; 0 = none */
   u8 dir;
   u8 pad[3];
 } AsmConstraint;
diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c
@@ -695,7 +695,9 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
 static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
   SrcLoc loc;
   Sym name;
-  const Type* var_ty = parse_declarator(p, specs->type, &name, &loc);
+  DeclaratorInfo dinfo;
+  const Type* var_ty = parse_declarator_full_info(
+      p, specs->type, /*allow_abstract=*/0, &name, &loc, NULL, &dinfo);
   if ((specs->flags & DF_THREAD) && specs->storage != DS_STATIC &&
       specs->storage != DS_EXTERN) {
     perr(p, "block-scope _Thread_local requires static or extern");
@@ -876,7 +878,10 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
       s = make_local_aligned(p, name, var_ty, loc, specs->align);
       if (specs->storage == DS_REGISTER) {
         SymEntry* e = scope_lookup_current(p, name);
-        if (e && e->kind == SEK_LOCAL) e->storage = DS_REGISTER;
+        if (e && e->kind == SEK_LOCAL) {
+          e->storage = DS_REGISTER;
+          e->reg_asm_name = dinfo.asm_label;
+        }
       }
       pcg_set_loc(p, loc);
       init_at(p, s, var_ty, 0, var_ty);
@@ -885,7 +890,10 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
     s = make_local_aligned(p, name, var_ty, loc, specs->align);
     if (specs->storage == DS_REGISTER) {
       SymEntry* e = scope_lookup_current(p, name);
-      if (e && e->kind == SEK_LOCAL) e->storage = DS_REGISTER;
+      if (e && e->kind == SEK_LOCAL) {
+        e->storage = DS_REGISTER;
+        e->reg_asm_name = dinfo.asm_label;
+      }
     }
     if (accept_punct(p, '=')) {
       pcg_set_loc(p, loc);
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -134,6 +134,11 @@ struct SymEntry {
   FrameSlot vla_byte_slot;
   VLABound* vla_bounds;
   struct Attr* attrs;
+  /* For a `register T x __asm__("reg")` local: the interned hard-register name
+   * ("r10", "x8", ...) the variable is bound to. Pins x to that register when
+   * used as an inline-asm operand (GNU explicit register variables). 0 = none.
+   */
+  Sym reg_asm_name;
   SymEntry* next;
 };
 
@@ -473,6 +478,7 @@ typedef struct DeclaratorInfo {
   ParamInfo* fn_params;
   u16 fn_nparams;
   u8 fn_variadic;
+  Sym asm_label;
 } DeclaratorInfo;
 
 /* ============================================================
diff --git a/lang/c/parse/parse_stmt.c b/lang/c/parse/parse_stmt.c
@@ -517,10 +517,28 @@ void parse_static_assert(Parser* p) {
  * already been consumed by parse_stmt. */
 typedef struct AsmOutLValue {
   FrameSlot addr_slot;
+  FrameSlot value_slot;
   const Type* ptr_ty;
   const Type* val_ty;
+  u8 direct_local;
+  u8 pad[3];
 } AsmOutLValue;
 
+static void asm_out_lvalue_push(Parser* p, const AsmOutLValue* lv) {
+  if (lv->direct_local) {
+    pcg_push_local_typed(p, lv->value_slot, lv->val_ty);
+    return;
+  }
+  pcg_push_local_typed(p, lv->addr_slot, lv->ptr_ty);
+  pcg_load(p);
+  pcg_deref(p, lv->val_ty);
+}
+
+static void asm_out_value_push(Parser* p, const AsmOutLValue* lv) {
+  asm_out_lvalue_push(p, lv);
+  pcg_load(p);
+}
+
 static Sym parse_asm_operand_name(Parser* p) {
   Sym name = 0;
   if (!is_punct(&p->cur, '[')) return 0;
@@ -554,6 +572,27 @@ static const char* parse_asm_str(Parser* p, const char* what) {
   return kit_sym_str(p->pool->c, s).s;
 }
 
+/* GNU local register variables: when an asm operand is exactly a bare reference
+ * to a `register T x __asm__("reg")` local, return that register name (else 0).
+ * Called with p->cur positioned at the first token of the operand expression,
+ * so it only peeks — it must not consume. The operand has to be a lone
+ * identifier (the canonical idiom); anything more complex is not a
+ * hard-register operand under GCC's rules either. The name is carried opaquely
+ * on the constraint's `reg` field; CG/native code validates that the constraint
+ * is a target register constraint and only the target resolves it to a
+ * register. */
+static Sym asm_operand_pinned_reg(Parser* p, FrameSlot* slot_out) {
+  Tok nxt;
+  SymEntry* e;
+  if (p->cur.kind != TOK_IDENT) return 0;
+  nxt = peek1(p);
+  if (!is_punct(&nxt, ')')) return 0;
+  e = scope_lookup(p, p->cur.v.ident);
+  if (!e || e->kind != SEK_LOCAL) return 0;
+  if (e->reg_asm_name && slot_out) *slot_out = e->v.slot;
+  return e->reg_asm_name;
+}
+
 static void parse_asm_stmt(Parser* p) {
   const char* tmpl;
   AsmConstraint* outs = NULL;
@@ -592,8 +631,10 @@ static void parse_asm_stmt(Parser* p) {
         const Type* ptr_ty;
         FrameSlotDesc fsd;
         FrameSlot slot;
+        FrameSlot pinned_slot;
         memset(&c, 0, sizeof c);
         memset(&lv, 0, sizeof lv);
+        pinned_slot = FRAME_SLOT_NONE;
         c.name = parse_asm_operand_name(p);
         c.str = parse_asm_str(p, "asm output constraint");
         if (c.str && c.str[0] == '+')
@@ -601,25 +642,32 @@ static void parse_asm_stmt(Parser* p) {
         else
           c.dir = ASM_OUT;
         expect_punct(p, '(', "'(' before asm output lvalue");
+        c.reg = asm_operand_pinned_reg(p, &pinned_slot);
         parse_assign_expr(p);
         val_ty = pcg_top_type(p);
         if (!val_ty) perr(p, "asm output: cannot determine lvalue type");
         c.type = val_ty;
-        pcg_addr(p);
-        ptr_ty = pcg_top_type(p);
-        if (!ptr_ty) perr(p, "asm output: cannot take address");
-        memset(&fsd, 0, sizeof fsd);
-        fsd.type = ptr_ty;
-        fsd.size = 8;
-        fsd.align = 8;
-        fsd.kind = FS_LOCAL;
-        slot = pcg_local(p, &fsd);
-        pcg_push_local_typed(p, slot, ptr_ty);
-        pcg_swap(p);
-        pcg_store(p);
-        pcg_drop(p);
-        lv.addr_slot = slot;
-        lv.ptr_ty = ptr_ty;
+        if (c.reg && pinned_slot != FRAME_SLOT_NONE) {
+          pcg_drop(p);
+          lv.direct_local = 1;
+          lv.value_slot = pinned_slot;
+        } else {
+          pcg_addr(p);
+          ptr_ty = pcg_top_type(p);
+          if (!ptr_ty) perr(p, "asm output: cannot take address");
+          memset(&fsd, 0, sizeof fsd);
+          fsd.type = ptr_ty;
+          fsd.size = 8;
+          fsd.align = 8;
+          fsd.kind = FS_LOCAL;
+          slot = pcg_local(p, &fsd);
+          pcg_push_local_typed(p, slot, ptr_ty);
+          pcg_swap(p);
+          pcg_store(p);
+          pcg_drop(p);
+          lv.addr_slot = slot;
+          lv.ptr_ty = ptr_ty;
+        }
         lv.val_ty = val_ty;
         expect_punct(p, ')', "')' after asm output lvalue");
         if (nout == cap_out) {
@@ -653,6 +701,7 @@ static void parse_asm_stmt(Parser* p) {
           c.str = parse_asm_str(p, "asm input constraint");
           c.dir = ASM_IN;
           expect_punct(p, '(', "'(' before asm input expression");
+          c.reg = asm_operand_pinned_reg(p, NULL);
           parse_assign_expr(p);
           to_rvalue(p);
           c.type = pcg_top_type(p);
@@ -736,10 +785,7 @@ static void parse_asm_stmt(Parser* p) {
              "matching-digit syntax");
       }
       AsmOutLValue* lv = &out_lvs[i];
-      pcg_push_local_typed(p, lv->addr_slot, lv->ptr_ty);
-      pcg_load(p);
-      pcg_deref(p, lv->val_ty);
-      pcg_load(p);
+      asm_out_value_push(p, lv);
       AsmConstraint mc;
       memset(&mc, 0, sizeof mc);
       mc.str = k_match_strs[i];
@@ -756,9 +802,7 @@ static void parse_asm_stmt(Parser* p) {
     u32 i;
     for (i = nout; i-- > 0;) {
       AsmOutLValue* lv = &out_lvs[i];
-      pcg_push_local_typed(p, lv->addr_slot, lv->ptr_ty);
-      pcg_load(p);
-      pcg_deref(p, lv->val_ty);
+      asm_out_lvalue_push(p, lv);
       pcg_swap(p);
       pcg_store(p);
       pcg_drop(p);
diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c
@@ -109,20 +109,36 @@ static int starts_asm_label(const Parser* p) {
   return is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM);
 }
 
-static void parse_asm_label(Parser* p) {
+static Sym parse_asm_label(Parser* p) {
+  Sym label = 0;
   advance(p); /* asm / __asm / __asm__ */
   expect_punct(p, '(', "'(' after asm label");
   if (p->cur.kind != TOK_STR) {
     perr(p, "expected string literal in asm label");
   }
+  /* Capture the label string for the declarator currently being parsed. For a
+   * `register T x __asm__("r10")` local this is the hard register name the
+   * variable binds to. Other asm labels (symbol renames) are still effectively
+   * ignored by callers that do not consume DeclaratorInfo.asm_label. */
+  {
+    Tok t = p->cur;
+    size_t nlen = 0;
+    u8* bytes = decode_string_literal(p, &t, &nlen);
+    u32 ilen = (nlen > 0) ? (u32)(nlen - 1) : 0;
+    label = kit_sym_intern(p->pool->c,
+                           (KitSlice){.s = (const char*)bytes, .len = ilen});
+    kit_compiler_context(p->c)->heap->free(kit_compiler_context(p->c)->heap,
+                                           bytes, 0);
+  }
   do {
     advance(p);
   } while (p->cur.kind == TOK_STR);
   expect_punct(p, ')', "')' after asm label");
+  return label;
 }
 
 static void parse_attrs_and_asm_into(Parser* p, Attr** attrs_out,
-                                     Attr** local_attrs) {
+                                     Attr** local_attrs, Sym* asm_label_out) {
   for (;;) {
     if (starts_attr(p)) {
       if (attrs_out)
@@ -132,7 +148,8 @@ static void parse_attrs_and_asm_into(Parser* p, Attr** attrs_out,
       continue;
     }
     if (starts_asm_label(p)) {
-      parse_asm_label(p);
+      Sym label = parse_asm_label(p);
+      if (asm_label_out) *asm_label_out = label;
       continue;
     }
     break;
@@ -146,7 +163,7 @@ static void parse_and_discard_attrs_or_asm(Parser* p) {
       continue;
     }
     if (starts_asm_label(p)) {
-      parse_asm_label(p);
+      (void)parse_asm_label(p);
       continue;
     }
     break;
@@ -1469,6 +1486,7 @@ const Type* parse_declarator_full_info(Parser* p, const Type* base,
                                        SrcLoc* loc_out, Attr** attrs_out,
                                        DeclaratorInfo* info_out) {
   Attr* local_attrs = NULL;
+  Sym asm_label = 0;
   base = parse_pointer_layer(p, base);
 
   Sym name = 0;
@@ -1594,7 +1612,7 @@ const Type* parse_declarator_full_info(Parser* p, const Type* base,
     }
   }
 
-  parse_attrs_and_asm_into(p, attrs_out, &local_attrs);
+  parse_attrs_and_asm_into(p, attrs_out, &local_attrs, &asm_label);
 
   DeclSuffix suffs[8];
   int nsuffs = 0;
@@ -1603,7 +1621,7 @@ const Type* parse_declarator_full_info(Parser* p, const Type* base,
   while (nsuffs < 8) {
     if (!parse_decl_suffix(p, &suffs[nsuffs])) break;
     ++nsuffs;
-    parse_attrs_and_asm_into(p, attrs_out, &local_attrs);
+    parse_attrs_and_asm_into(p, attrs_out, &local_attrs, &asm_label);
   }
   base = attrs_apply_type_mode(p, base, attrs_out ? *attrs_out : local_attrs);
   if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) {
@@ -1642,6 +1660,7 @@ const Type* parse_declarator_full_info(Parser* p, const Type* base,
     info_out->fn_nparams = final_fn_suff->nparams;
     info_out->fn_variadic = final_fn_suff->variadic;
   }
+  if (info_out) info_out->asm_label = asm_label;
   if (name_out) *name_out = name;
   if (loc_out) *loc_out = nloc;
   return base;
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -774,16 +774,18 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg,
       mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_LD64_GOT_LO12_NC,
                         addr.base.global.sym, 0, 0, 0);
       if (addend) aa_emit_add_i64(a, scratch, scratch, addend);
-      aa_emit32(mc, load ? aa_ldur_v(sz, native_loc_is_fp(reg), rt, scratch, 0)
-                         : aa_stur_v(sz, native_loc_is_fp(reg), rt, scratch, 0));
+      aa_emit32(mc, load
+                        ? aa_ldur_v(sz, native_loc_is_fp(reg), rt, scratch, 0)
+                        : aa_stur_v(sz, native_loc_is_fp(reg), rt, scratch, 0));
       return;
     }
     aa_emit32(mc, aa64_adrp(scratch, 0, 0));
     mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_ADR_PREL_PG_HI21,
                       addr.base.global.sym, addend, 0, 0);
     pos = mc->pos(mc);
-    aa_emit32(mc, load ? aa_ldr_uimm_v(sz, native_loc_is_fp(reg), rt, scratch, 0)
-                       : aa_str_uimm_v(sz, native_loc_is_fp(reg), rt, scratch, 0));
+    aa_emit32(mc,
+              load ? aa_ldr_uimm_v(sz, native_loc_is_fp(reg), rt, scratch, 0)
+                   : aa_str_uimm_v(sz, native_loc_is_fp(reg), rt, scratch, 0));
     mc->emit_reloc_at(mc, mc->section_id, pos, aa_ldst_reloc_for_size(sz),
                       addr.base.global.sym, addend, 0, 0);
     return;
@@ -805,14 +807,16 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg,
     } else {
       aa_panic(a, "unsupported memory address scale");
     }
-    aa_emit32(mc, aa_ldst_regoff_v(sz, native_loc_is_fp(reg), load, rt, use_base,
-                                   addr.index.reg, scaled));
+    aa_emit32(mc, aa_ldst_regoff_v(sz, native_loc_is_fp(reg), load, rt,
+                                   use_base, addr.index.reg, scaled));
     return;
   }
   if (off >= 0 && (((u32)off & ((1u << sz) - 1u)) == 0) &&
       ((u32)off >> sz) <= 0xfffu) {
-    aa_emit32(mc, load ? aa_ldr_uimm_v(sz, native_loc_is_fp(reg), rt, base, (u32)off)
-                       : aa_str_uimm_v(sz, native_loc_is_fp(reg), rt, base, (u32)off));
+    aa_emit32(
+        mc, load
+                ? aa_ldr_uimm_v(sz, native_loc_is_fp(reg), rt, base, (u32)off)
+                : aa_str_uimm_v(sz, native_loc_is_fp(reg), rt, base, (u32)off));
     return;
   }
   if (off >= -256 && off <= 255) {
@@ -962,7 +966,8 @@ static void aa_materialize_frame_index(AANativeTarget* a, NativeAddr* addr,
   addr->index.reg = reg;
 }
 
-static NativeLoc native_loc_reg(KitCgTypeId type, NativeAllocClass cls, Reg reg);
+static NativeLoc native_loc_reg(KitCgTypeId type, NativeAllocClass cls,
+                                Reg reg);
 
 static u32 aa_ldst_q_uimm(int load, u32 rt, u32 rn, u32 byte_off) {
   return aa64_ldst_uimm_pack((AA64LdStUimm){.size = 0,
@@ -2691,7 +2696,8 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
   if (plan->callee.kind == NATIVE_LOC_REG &&
       (NativeAllocClass)plan->callee.cls == NATIVE_REG_INT &&
       plan->callee.v.reg < 8u) {
-    NativeLoc scratch = native_loc_reg(plan->callee.type, NATIVE_REG_INT, AA_TMP0);
+    NativeLoc scratch =
+        native_loc_reg(plan->callee.type, NATIVE_REG_INT, AA_TMP0);
     aa_move(t, scratch, plan->callee);
     plan->callee = scratch;
   }
@@ -2730,15 +2736,15 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
       if (ai->kind == ABI_ARG_INDIRECT) {
         if (next_int < 8u) {
           AAArgMove* m = &moves[nmoves++];
-          m->dst = native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
-                              next_int++);
+          m->dst = native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64),
+                                  NATIVE_REG_INT, next_int++);
           m->src = desc->args[i];
           m->src_offset = 0;
           m->size = 8;
           m->is_addr = 1;
         } else {
           NativeLoc ptr = native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64),
-                                     NATIVE_REG_INT, AA_TMP0);
+                                         NATIVE_REG_INT, AA_TMP0);
           aa_addr_of_loc(t, ptr, desc->args[i]);
           aa_store_outgoing_part(t, tail_call, stack, ptr, 8);
           stack += 8u;
@@ -2752,15 +2758,16 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
         if ((cls == NATIVE_REG_FP && next_fp < 8u) ||
             (cls == NATIVE_REG_INT && next_int < 8u)) {
           AAArgMove* m = &moves[nmoves++];
-          m->dst = native_loc_reg(desc->args[i].type, cls,
-                              cls == NATIVE_REG_FP ? next_fp++ : next_int++);
+          m->dst =
+              native_loc_reg(desc->args[i].type, cls,
+                             cls == NATIVE_REG_FP ? next_fp++ : next_int++);
           m->src = desc->args[i];
           m->src_offset = part->src_offset;
           m->size = part->size;
           m->is_addr = 0;
         } else {
-          NativeLoc tmpreg = native_loc_reg(desc->args[i].type, cls,
-                                        cls == NATIVE_REG_FP ? 16u : AA_TMP0);
+          NativeLoc tmpreg = native_loc_reg(
+              desc->args[i].type, cls, cls == NATIVE_REG_FP ? 16u : AA_TMP0);
           aa_load_part(t, tmpreg, desc->args[i], part->src_offset, part->size);
           stack = align_up_u32(stack, aa_part_stack_align(part));
           aa_store_outgoing_part(t, tail_call, stack, tmpreg, part->size);
@@ -2792,11 +2799,12 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
       NativeAllocClass cls =
           part->cls == ABI_CLASS_FP ? NATIVE_REG_FP : NATIVE_REG_INT;
       KitCgTypeId pty = aa_part_scalar_type(part);
-      rets[nr].src = native_loc_reg(pty, cls, cls == NATIVE_REG_FP ? nf++ : ni++);
+      rets[nr].src =
+          native_loc_reg(pty, cls, cls == NATIVE_REG_FP ? nf++ : ni++);
       rets[nr].dst = desc->results[0];
       if (rets[nr].dst.kind == NATIVE_LOC_FRAME)
-        rets[nr].dst =
-            native_loc_stack(pty, desc->results[0].v.frame, (i32)part->src_offset);
+        rets[nr].dst = native_loc_stack(pty, desc->results[0].v.frame,
+                                        (i32)part->src_offset);
       else if (rets[nr].dst.kind == NATIVE_LOC_STACK) {
         rets[nr].dst.v.stack.offset += (i32)part->src_offset;
         rets[nr].dst.type = pty;
@@ -2935,7 +2943,8 @@ static void aa_plan_ret(NativeTarget* t, const CGFuncDesc* fd,
         rets[nr].src.v.addr.offset += (i32)part->src_offset;
         rets[nr].src.type = pty;
       }
-      rets[nr].dst = native_loc_reg(pty, cls, cls == NATIVE_REG_FP ? nf++ : ni++);
+      rets[nr].dst =
+          native_loc_reg(pty, cls, cls == NATIVE_REG_FP ? nf++ : ni++);
       rets[nr].mem = aa_mem_for_type(t, pty, part->size);
       nr++;
     }
@@ -3661,9 +3670,54 @@ static const NativeAllocClassInfo aa_classes[] = {
      .ret_mask = 0x0000000fu},
 };
 
+/* Resolve a register name ("x8", "v3", ...) to its (class, Reg). Powers the
+ * optimizer's inline-asm clobber masks and explicit hard-register operands
+ * ("{x8}" from a GNU local register variable). x0..x30 are DWARF 0..30; the
+ * SIMD/FP bank v0..v31 is DWARF 64..95. Returns non-zero for a non-register
+ * name (cc/memory/unknown), which the caller skips. */
+static int aa_resolve_name(const NativeRegInfo* ri, Slice name, Reg* out,
+                           NativeAllocClass* cls_out) {
+  char buf[16];
+  uint32_t dwarf;
+  (void)ri;
+  if (!name.s || !name.len || name.len >= sizeof buf) return 1;
+  memcpy(buf, name.s, name.len);
+  buf[name.len] = '\0';
+  if (aa64_register_index(buf, &dwarf) != 0) return 1;
+  if (dwarf <= 30u) {
+    *cls_out = NATIVE_REG_INT;
+    *out = (Reg)dwarf;
+    return 0;
+  }
+  if (dwarf >= 64u && dwarf <= 95u) {
+    *cls_out = NATIVE_REG_FP;
+    *out = (Reg)(dwarf - 64u);
+    return 0;
+  }
+  return 1;
+}
+
+static int aa_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
+                                 Reg reg) {
+  (void)ri;
+  if (cls == NATIVE_REG_INT) {
+    if (reg <= 8u) return 1;
+    if (reg >= 12u && reg <= 15u) return 1;
+    if (reg >= 19u && reg <= 28u) return 1;
+    return 0;
+  }
+  if (cls == NATIVE_REG_FP) {
+    if (reg <= 19u) return 1;
+    if (reg >= 22u && reg <= 31u) return 1;
+  }
+  return 0;
+}
+
 static const NativeRegInfo aa_reg_info = {
     .classes = aa_classes,
     .nclasses = sizeof aa_classes / sizeof aa_classes[0],
+    .resolve_name = aa_resolve_name,
+    .asm_operand_reg_ok = aa_asm_operand_reg_ok,
 };
 
 static void aa_va_start_native(NativeTarget* t, NativeLoc ap_ptr);
@@ -3768,7 +3822,7 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
     AggregateAccess access;
     NativeLoc src =
         native_loc_reg(p->type, NATIVE_REG_INT,
-                   a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0);
+                       a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0);
     if (src.v.reg == AA_TMP0) {
       NativeAddr saddr;
       memset(&saddr, 0, sizeof saddr);
@@ -3826,14 +3880,14 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
       /* Unused parameter: only the ABI cursor advances. */
     } else if (to_reg) {
       NativeLoc d = native_loc_reg(dst.type ? dst.type : p->type,
-                               (NativeAllocClass)dst.cls, (Reg)dst.v.reg);
+                                   (NativeAllocClass)dst.cls, (Reg)dst.v.reg);
       if (!(src.kind == NATIVE_LOC_REG && src.v.reg == d.v.reg &&
             (NativeAllocClass)src.cls == (NativeAllocClass)d.cls))
         aa_move(t, d, src);
     } else {
-      aa_store_part(t,
-                    native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
-                    src, 0, part->size);
+      aa_store_part(
+          t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset), src,
+          0, part->size);
     }
   }
   a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u);
@@ -4143,7 +4197,7 @@ static void aa_va_arg_(NativeDirectTarget* d, Operand dst_op, Operand ap_addr,
   AANativeTarget* a = aa_of(d->native);
   int is_fp = cg_type_is_float(d->base.c, type);
   NativeLoc res = native_loc_reg(type, is_fp ? NATIVE_REG_FP : NATIVE_REG_INT,
-                             is_fp ? 16u : 9u);
+                                 is_fp ? 16u : 9u);
   MemAccess val_mem =
       aa_mem_for_type(d->native, type, type_size32(d->native, type));
   NativeAddr dst;
@@ -4302,6 +4356,17 @@ AA_UNUSED_FN static NativeAllocClass aa_asm_constraint_class(
   return NATIVE_REG_INT;
 }
 
+static int aa_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
+                                       const char* constraint,
+                                       NativeAsmRegPin* pin) {
+  NativeAsmRegPinStatus st =
+      native_asm_resolve_pin(d->native, reg, constraint, pin);
+  if (st == NATIVE_ASM_REG_PIN_ABSENT) return 0;
+  if (st != NATIVE_ASM_REG_PIN_OK)
+    aa_asm_panic(d, native_asm_pin_status_message(st));
+  return 1;
+}
+
 AA_UNUSED_FN static void aa_direct_load_operand_to_reg(NativeDirectTarget* d,
                                                        Operand op,
                                                        NativeLoc dst) {
@@ -4444,21 +4509,26 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
 
   for (u32 i = 0; i < nout; ++i) {
     const char* body = native_asm_constraint_body(outs[i].str);
-    if (body[0] == 'r' || body[0] == 'w') {
+    NativeAsmRegPin pin;
+    if (aa_asm_resolve_pin_or_panic(d, outs[i].reg, outs[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      aa_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'w') {
       NativeAllocClass cls = aa_asm_constraint_class(d, body);
       Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp);
       KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
       aa_asm_bound_reg(&bound_outs[i], type, cls, reg);
-      if (outs[i].dir == KIT_CG_ASM_INOUT) {
-        NativeLoc loc = native_loc_reg(type, cls, reg);
-        aa_direct_load_operand_to_reg(d, out_ops[i], loc);
-      }
     } else if (body[0] == 'm') {
       Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc loc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
       KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
-      aa_direct_load_address_to_reg(d, out_ops[i], loc);
       aa_asm_bound_mem(&bound_outs[i], type, reg);
     } else {
       aa_asm_panic(d, "unsupported output constraint");
@@ -4476,31 +4546,32 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
       if (bound_outs[matched].kind != AA64_INLINE_OPK_REG)
         aa_asm_panic(d, "matching constraint requires register output");
       bound_ins[i] = bound_outs[matched];
-      aa_direct_load_operand_to_reg(
-          d, in_ops[i],
-          native_loc_reg(bound_ins[i].type,
-                     bound_ins[i].pad[0] == AA64_INLINE_OPCLS_FP
-                         ? NATIVE_REG_FP
-                         : NATIVE_REG_INT,
-                     (Reg)bound_ins[i].v.local));
       continue;
     }
-    if (body[0] == 'r' || body[0] == 'w') {
+    NativeAsmRegPin pin;
+    if (aa_asm_resolve_pin_or_panic(d, ins[i].reg, ins[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      aa_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'w') {
       NativeAllocClass cls = aa_asm_constraint_class(d, body);
       Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp);
       KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
       aa_asm_bound_reg(&bound_ins[i], type, cls, reg);
-      aa_direct_load_operand_to_reg(d, in_ops[i], native_loc_reg(type, cls, reg));
     } else if (body[0] == 'i') {
       if (in_ops[i].kind != OPK_IMM)
         aa_asm_panic(d, "immediate constraint requires immediate operand");
       bound_ins[i] = in_ops[i];
     } else if (body[0] == 'm') {
       Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc loc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
       KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
-      aa_direct_load_address_to_reg(d, in_ops[i], loc);
       aa_asm_bound_mem(&bound_ins[i], type, reg);
     } else {
       aa_asm_panic(d, "unsupported input constraint");
@@ -4509,6 +4580,39 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
 
   saved =
       aa_asm_save_callee_clobbers(aa_of(d->native), clob_int, clob_fp, &nsaved);
+  for (u32 i = 0; i < nout; ++i) {
+    if (bound_outs[i].kind == AA64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_outs[i].pad[0] == AA64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      if (outs[i].dir == KIT_CG_ASM_INOUT) {
+        aa_direct_load_operand_to_reg(
+            d, out_ops[i],
+            native_loc_reg(bound_outs[i].type, cls,
+                           (Reg)bound_outs[i].v.local));
+      }
+    } else if (bound_outs[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_outs[i].v.ind.base);
+      aa_direct_load_address_to_reg(d, out_ops[i], loc);
+    }
+  }
+  for (u32 i = 0; i < nin; ++i) {
+    if (bound_ins[i].kind == AA64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_ins[i].pad[0] == AA64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      aa_direct_load_operand_to_reg(
+          d, in_ops[i],
+          native_loc_reg(bound_ins[i].type, cls, (Reg)bound_ins[i].v.local));
+    } else if (bound_ins[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_ins[i].v.ind.base);
+      aa_direct_load_address_to_reg(d, in_ops[i], loc);
+    }
+  }
   a = aa64_asm_open(d->base.c);
   aa64_inline_bind(a, outs, nout, bound_outs, ins, nin, bound_ins, clobbers,
                    nclob);
diff --git a/src/arch/c_target/c_emit.c b/src/arch/c_target/c_emit.c
@@ -3224,12 +3224,92 @@ static void c_emit_c_string_literal(CBuf* b, const char* s) {
   cbuf_putc(b, '"');
 }
 
+/* "__kit_ao<i>" / "__kit_ai<i>": a unique name for the register temporary that
+ * carries a hard-register-pinned output/input operand. */
+static void c_asm_reg_temp_name(char* out, size_t cap, int is_out, u32 idx) {
+  const char* pfx = is_out ? "__kit_ao" : "__kit_ai";
+  size_t i = 0;
+  char tmp[16];
+  size_t n = 0;
+  u32 v = idx;
+  while (*pfx && i + 1 < cap) out[i++] = *pfx++;
+  if (!v) tmp[n++] = '0';
+  while (v) {
+    tmp[n++] = (char)('0' + v % 10);
+    v /= 10;
+  }
+  while (n && i + 1 < cap) out[i++] = tmp[--n];
+  out[i] = '\0';
+}
+
+/* Emit an asm output operand's lvalue expression (a plain local, or a
+ * dereferenced address for OPK_INDIRECT). Usable as both lvalue and rvalue. */
+static void c_emit_asm_out_lvalue(CTarget* t, Operand op) {
+  if (op.kind == OPK_LOCAL) {
+    char rb[24];
+    c_ensure_local(t, op.v.local, op.type);
+    c_local_name(op.v.local, rb, sizeof rb);
+    cbuf_puts(&t->body, rb);
+  } else {
+    c_emit_addr_deref(t, op, op.type);
+  }
+}
+
 void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
                       u32 no, Operand* oo, const AsmConstraint* ins, u32 ni,
                       const Operand* io, const Sym* clobs, u32 nc) {
+  char nm[24];
   for (u32 i = 0; i < no; ++i) c_assert_no_index(t, oo[i], "asm_block out");
   for (u32 i = 0; i < ni; ++i) c_assert_no_index(t, io[i], "asm_block in");
-  cbuf_puts(&t->body, "  __asm__ __volatile__ (");
+
+  /* GNU local register variables (AsmConstraint.reg): a target backend resolves
+   * the pin to a physical register, but the portable C backend has no register
+   * names to bind — so re-emit each pinned operand as a faithful
+   * `register T v __asm__("reg")` temporary (scoped in a block) and let the
+   * host compiler honor the binding. Dormant unless a frontend marks an
+   * operand; only the C frontend does, for register variables. */
+  int any_pin = 0;
+  for (u32 i = 0; i < no; ++i)
+    if (outs[i].reg) any_pin = 1;
+  for (u32 i = 0; i < ni; ++i)
+    if (ins[i].reg) any_pin = 1;
+
+  if (any_pin) {
+    cbuf_puts(&t->body, "  {\n");
+    for (u32 i = 0; i < ni; ++i) {
+      if (!ins[i].reg) continue;
+      c_asm_reg_temp_name(nm, sizeof nm, 0, i);
+      cbuf_puts(&t->body, "    register ");
+      c_emit_type(t, &t->body, io[i].type);
+      cbuf_puts(&t->body, " ");
+      cbuf_puts(&t->body, nm);
+      cbuf_puts(&t->body, " __asm__(");
+      c_emit_c_string_literal(&t->body, pool_slice(t->c->global, ins[i].reg).s);
+      cbuf_puts(&t->body, ") = ");
+      c_emit_operand(t, io[i]);
+      cbuf_puts(&t->body, ";\n");
+    }
+    for (u32 i = 0; i < no; ++i) {
+      if (!outs[i].reg) continue;
+      c_asm_reg_temp_name(nm, sizeof nm, 1, i);
+      cbuf_puts(&t->body, "    register ");
+      c_emit_type(t, &t->body, oo[i].type);
+      cbuf_puts(&t->body, " ");
+      cbuf_puts(&t->body, nm);
+      cbuf_puts(&t->body, " __asm__(");
+      c_emit_c_string_literal(&t->body,
+                              pool_slice(t->c->global, outs[i].reg).s);
+      cbuf_puts(&t->body, ")");
+      if (outs[i].dir == KIT_CG_ASM_INOUT) {
+        cbuf_puts(&t->body, " = ");
+        c_emit_asm_out_lvalue(t, oo[i]);
+      }
+      cbuf_puts(&t->body, ";\n");
+    }
+  }
+
+  cbuf_puts(&t->body, any_pin ? "    __asm__ __volatile__ ("
+                              : "  __asm__ __volatile__ (");
   c_emit_c_string_literal(&t->body, tmpl ? tmpl : "");
   /* Outputs. */
   cbuf_puts(&t->body, " : ");
@@ -3243,14 +3323,13 @@ void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
     c_emit_c_string_literal(&t->body, outs[i].str ? outs[i].str : "");
     cbuf_puts(&t->body, "(");
     /* Outputs must be an lvalue. OPK_LOCAL is a plain C local; this
-     * works directly. OPK_LOCAL / OPK_INDIRECT also produce lvalues. */
-    if (oo[i].kind == OPK_LOCAL) {
-      c_ensure_local(t, oo[i].v.local, oo[i].type);
-      char rb[24];
-      c_local_name(oo[i].v.local, rb, sizeof rb);
-      cbuf_puts(&t->body, rb);
+     * works directly. OPK_LOCAL / OPK_INDIRECT also produce lvalues. A pinned
+     * output names its register temporary instead. */
+    if (outs[i].reg) {
+      c_asm_reg_temp_name(nm, sizeof nm, 1, i);
+      cbuf_puts(&t->body, nm);
     } else {
-      c_emit_addr_deref(t, oo[i], oo[i].type);
+      c_emit_asm_out_lvalue(t, oo[i]);
     }
     cbuf_puts(&t->body, ")");
   }
@@ -3275,7 +3354,12 @@ void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
     }
     c_emit_c_string_literal(&t->body, cs);
     cbuf_puts(&t->body, "(");
-    c_emit_operand(t, io[i]);
+    if (ins[i].reg) {
+      c_asm_reg_temp_name(nm, sizeof nm, 0, i);
+      cbuf_puts(&t->body, nm);
+    } else {
+      c_emit_operand(t, io[i]);
+    }
     cbuf_puts(&t->body, ")");
   }
   /* Clobbers. */
@@ -3285,6 +3369,19 @@ void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
     c_emit_c_string_literal(&t->body, pool_slice(t->c->global, clobs[i]).s);
   }
   cbuf_puts(&t->body, ");\n");
+
+  if (any_pin) {
+    for (u32 i = 0; i < no; ++i) {
+      if (!outs[i].reg) continue;
+      c_asm_reg_temp_name(nm, sizeof nm, 1, i);
+      cbuf_puts(&t->body, "    ");
+      c_emit_asm_out_lvalue(t, oo[i]);
+      cbuf_puts(&t->body, " = ");
+      cbuf_puts(&t->body, nm);
+      cbuf_puts(&t->body, ";\n");
+    }
+    cbuf_puts(&t->body, "  }\n");
+  }
 }
 
 /* === load_const ===
diff --git a/src/arch/native_target.h b/src/arch/native_target.h
@@ -7,6 +7,7 @@
 #include "cg/cgtarget.h"
 #include "cg/type.h"
 #include "core/core.h"
+#include "core/slice.h" /* Slice, for resolve_name */
 
 /* NativeTarget is the physical native-emission contract. It is driven after
  * semantic CG has been either direct-lowered by NativeDirectTarget or recorded,
@@ -149,8 +150,18 @@ struct NativeRegInfo {
   const NativeAllocClassInfo* classes;
   u32 nclasses;
 
-  int (*resolve_name)(const NativeRegInfo*, Sym name, Reg* out,
+  /* Map a register name to its (Reg, class). `name` is the raw spelling
+   * ("rax", "x8", "a7"); the caller resolves any Sym to its bytes first so this
+   * stays pool-free. Returns 0 on success, non-zero for a non-register name. */
+  int (*resolve_name)(const NativeRegInfo*, Slice name, Reg* out,
                       NativeAllocClass* cls_out);
+  /* True when (cls, reg) is a valid hard-register home for an inline-asm value
+   * operand. This is intentionally separate from allocator availability:
+   * syscall idioms need ABI registers such as x8/a7, while stack/frame, zero,
+   * link, platform, and backend scratch registers must stay unavailable even if
+   * the assembler can name them. */
+  int (*asm_operand_reg_ok)(const NativeRegInfo*, NativeAllocClass cls,
+                            Reg reg);
   const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg);
   u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg);
 };
diff --git a/src/arch/rv64/native.c b/src/arch/rv64/native.c
@@ -459,9 +459,52 @@ static const NativeAllocClassInfo rv_classes[] = {
      .reserved_mask = 0x0000000fu /* ft0-ft3 */},
 };
 
+/* Resolve a register name ("a7", "fa0", ...) to its (class, Reg). Powers the
+ * optimizer's inline-asm clobber masks and explicit hard-register operands
+ * ("{a7}" from a GNU local register variable). x0..x31 are DWARF 0..31; the
+ * FP bank f0..f31 is DWARF 32..63. Returns non-zero for a non-register name
+ * (cc/memory/unknown), which the caller skips. */
+static int rv_resolve_name(const NativeRegInfo* ri, Slice name, Reg* out,
+                           NativeAllocClass* cls_out) {
+  char buf[16];
+  uint32_t dwarf;
+  (void)ri;
+  if (!name.s || !name.len || name.len >= sizeof buf) return 1;
+  memcpy(buf, name.s, name.len);
+  buf[name.len] = '\0';
+  if (rv64_register_index(buf, &dwarf) != 0) return 1;
+  if (dwarf <= 31u) {
+    *cls_out = NATIVE_REG_INT;
+    *out = (Reg)dwarf;
+    return 0;
+  }
+  if (dwarf >= 32u && dwarf <= 63u) {
+    *cls_out = NATIVE_REG_FP;
+    *out = (Reg)(dwarf - 32u);
+    return 0;
+  }
+  return 1;
+}
+
+static int rv_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
+                                 Reg reg) {
+  (void)ri;
+  if (cls == NATIVE_REG_INT) {
+    if (reg == 9u) return 1;                /* s1 */
+    if (reg >= 10u && reg <= 17u) return 1; /* a0..a7 */
+    if (reg >= 18u && reg <= 27u) return 1; /* s2..s11 */
+    if (reg == 31u) return 1;               /* t6 */
+    return 0;
+  }
+  if (cls == NATIVE_REG_FP) return reg >= 4u && reg <= 31u;
+  return 0;
+}
+
 static const NativeRegInfo rv_reg_info = {
     .classes = rv_classes,
     .nclasses = sizeof rv_classes / sizeof rv_classes[0],
+    .resolve_name = rv_resolve_name,
+    .asm_operand_reg_ok = rv_asm_operand_reg_ok,
 };
 
 /* ============================ legality ============================ */
@@ -1714,8 +1757,10 @@ static const ABIArgInfo* rv_param_abi(NativeTarget* t, const ABIFuncInfo* abi,
       (!variadic && cg_type_is_float(t->c, desc->args[i].type)) ? ABI_CLASS_FP
                                                                 : ABI_CLASS_INT;
   ((ABIArgPart*)scratch->parts)[0].loc = ABI_LOC_REG;
-  ((ABIArgPart*)scratch->parts)[0].size = native_type_size(t, desc->args[i].type);
-  ((ABIArgPart*)scratch->parts)[0].align = native_type_align(t, desc->args[i].type);
+  ((ABIArgPart*)scratch->parts)[0].size =
+      native_type_size(t, desc->args[i].type);
+  ((ABIArgPart*)scratch->parts)[0].align =
+      native_type_align(t, desc->args[i].type);
   return scratch;
 }
 
@@ -1981,14 +2026,14 @@ static void rv_bind_native_param(NativeTarget* t, const CGParamDesc* p,
       /* unused parameter; cursors already advanced */
     } else if (to_reg) {
       NativeLoc d = native_loc_reg(dst.type ? dst.type : p->type,
-                               (NativeAllocClass)dst.cls, (Reg)dst.v.reg);
+                                   (NativeAllocClass)dst.cls, (Reg)dst.v.reg);
       if (!(src.kind == NATIVE_LOC_REG && loc_reg(src) == loc_reg(d) &&
             (NativeAllocClass)src.cls == (NativeAllocClass)d.cls))
         rv_move(t, d, src);
     } else {
-      rv_store_part(t,
-                    native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
-                    src, 0, part->size);
+      rv_store_part(
+          t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset), src,
+          0, part->size);
     }
   }
   a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u);
@@ -2040,7 +2085,8 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc,
   if (plan->callee.kind == NATIVE_LOC_REG &&
       (NativeAllocClass)plan->callee.cls == NATIVE_REG_INT &&
       plan->callee.v.reg >= RV_A0 && plan->callee.v.reg <= RV_A7) {
-    NativeLoc scratch = native_loc_reg(plan->callee.type, NATIVE_REG_INT, RV_TMP0);
+    NativeLoc scratch =
+        native_loc_reg(plan->callee.type, NATIVE_REG_INT, RV_TMP0);
     rv_move(t, scratch, plan->callee);
     plan->callee = scratch;
   }
@@ -2133,8 +2179,8 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc,
       rets[nr].src = native_loc_reg(pty, cls, rreg);
       rets[nr].dst = desc->results[0];
       if (rets[nr].dst.kind == NATIVE_LOC_FRAME)
-        rets[nr].dst =
-            native_loc_stack(pty, desc->results[0].v.frame, (i32)part->src_offset);
+        rets[nr].dst = native_loc_stack(pty, desc->results[0].v.frame,
+                                        (i32)part->src_offset);
       else if (rets[nr].dst.kind == NATIVE_LOC_STACK) {
         rets[nr].dst.v.stack.offset += (i32)part->src_offset;
         rets[nr].dst.type = pty;
@@ -2423,7 +2469,8 @@ static void rv_atomic_load(NativeTarget* t, NativeLoc dst, NativeAddr addr,
                            MemAccess mem, KitCgMemOrder mo) {
   RvNativeTarget* a = rv_of(t);
   MCEmitter* mc = t->mc;
-  u32 sf = (mem.size ? mem.size : native_type_size(t, dst.type)) == 8u ? 1u : 0u;
+  u32 sf =
+      (mem.size ? mem.size : native_type_size(t, dst.type)) == 8u ? 1u : 0u;
   u32 base = rv_atomic_addr_reg(a, addr);
   if (mo == KIT_CG_MO_SEQ_CST) rv64_emit32(mc, rv_fence_rw_rw());
   if (rv_order_acquire(mo)) {
@@ -2431,9 +2478,9 @@ static void rv_atomic_load(NativeTarget* t, NativeLoc dst, NativeAddr addr,
     rv64_emit32(mc, sf ? rv_lr_d(loc_reg(dst), base, 1, 0)
                        : rv_lr_w(loc_reg(dst), base, 1, 0));
   } else {
-    rv64_emit32(mc,
-                enc_int_load(mem.size ? mem.size : native_type_size(t, dst.type), 0,
-                             loc_reg(dst), base, 0));
+    rv64_emit32(
+        mc, enc_int_load(mem.size ? mem.size : native_type_size(t, dst.type), 0,
+                         loc_reg(dst), base, 0));
   }
 }
 
@@ -2454,7 +2501,8 @@ static void rv_atomic_rmw(NativeTarget* t, KitCgAtomicOp op, NativeLoc dst,
                           KitCgMemOrder mo) {
   RvNativeTarget* a = rv_of(t);
   MCEmitter* mc = t->mc;
-  u32 sf = (mem.size ? mem.size : native_type_size(t, dst.type)) == 8u ? 1u : 0u;
+  u32 sf =
+      (mem.size ? mem.size : native_type_size(t, dst.type)) == 8u ? 1u : 0u;
   u32 base = rv_atomic_addr_reg(a, addr); /* RV_TMP0 */
   u32 vreg = loc_reg(val);
   u32 rd = loc_reg(dst);
@@ -2505,7 +2553,8 @@ static void rv_atomic_cas(NativeTarget* t, NativeLoc prior, NativeLoc ok,
                           KitCgMemOrder success, KitCgMemOrder failure) {
   RvNativeTarget* a = rv_of(t);
   MCEmitter* mc = t->mc;
-  u32 sf = (mem.size ? mem.size : native_type_size(t, prior.type)) == 8u ? 1u : 0u;
+  u32 sf =
+      (mem.size ? mem.size : native_type_size(t, prior.type)) == 8u ? 1u : 0u;
   u32 base = rv_atomic_addr_reg(a, addr); /* RV_TMP0 */
   u32 rprior = loc_reg(prior);
   u32 rexp = loc_reg(expected);
@@ -3112,6 +3161,17 @@ static NativeAllocClass rv_asm_constraint_class(NativeDirectTarget* d,
   return NATIVE_REG_INT;
 }
 
+static int rv_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
+                                       const char* constraint,
+                                       NativeAsmRegPin* pin) {
+  NativeAsmRegPinStatus st =
+      native_asm_resolve_pin(d->native, reg, constraint, pin);
+  if (st == NATIVE_ASM_REG_PIN_ABSENT) return 0;
+  if (st != NATIVE_ASM_REG_PIN_OK)
+    rv_asm_panic(d, native_asm_pin_status_message(st));
+  return 1;
+}
+
 /* Pick a free register from the arch's caller-saved allocable pools for an
  * asm operand the direct path must self-allocate. */
 static Reg rv_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
@@ -3620,13 +3680,14 @@ static void rv_va_arg_(NativeDirectTarget* d, Operand dst, Operand ap_addr,
   RvNativeTarget* a = rv_of(d->native);
   int is_fp = cg_type_is_float(d->base.c, type);
   NativeLoc res = native_loc_reg(type, is_fp ? NATIVE_REG_FP : NATIVE_REG_INT,
-                             is_fp ? RV_FTMP0 : RV_TMP0);
+                                 is_fp ? RV_FTMP0 : RV_TMP0);
   NativeAddr dst_addr;
   rv_va_arg_core(a, res, rv_direct_va_base(d, ap_addr, RV_TMP3), type);
   /* Store the fetched value back into the semantic destination. */
   dst_addr = rv_direct_addr(d, dst);
   if (dst_addr.base_kind == NATIVE_ADDR_BASE_FRAME_VALUE) {
-    NativeLoc base = native_loc_reg(dst_addr.base_type, NATIVE_REG_INT, RV_TMP1);
+    NativeLoc base =
+        native_loc_reg(dst_addr.base_type, NATIVE_REG_INT, RV_TMP1);
     NativeAddr load;
     memset(&load, 0, sizeof load);
     load.base_kind = NATIVE_ADDR_BASE_FRAME;
@@ -3637,8 +3698,9 @@ static void rv_va_arg_(NativeDirectTarget* d, Operand dst, Operand ap_addr,
     dst_addr.base_kind = NATIVE_ADDR_BASE_REG;
     dst_addr.base.reg = RV_TMP1;
   }
-  rv_emit_mem(a, 0, res, dst_addr,
-              native_mem_for_type(d->native, type, native_type_size(d->native, type)));
+  rv_emit_mem(
+      a, 0, res, dst_addr,
+      native_mem_for_type(d->native, type, native_type_size(d->native, type)));
 }
 static void rv_va_end_(NativeDirectTarget* d, Operand ap_addr) {
   (void)d;
@@ -3681,18 +3743,23 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
   for (i = 0; i < nout; ++i) {
     const char* body = native_asm_constraint_body(outs[i].str);
     KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
-    if (body[0] == 'r' || body[0] == 'f') {
+    NativeAsmRegPin pin;
+    if (rv_asm_resolve_pin_or_panic(d, outs[i].reg, outs[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      rv_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'f') {
       NativeAllocClass cls = rv_asm_constraint_class(d, body);
       Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp);
       rv_asm_bound_reg(&bound_outs[i], type, cls, reg);
-      if (outs[i].dir == KIT_CG_ASM_INOUT)
-        rv_direct_load_operand_to_reg(d, out_ops[i],
-                                      native_loc_reg(type, cls, reg));
     } else if (body[0] == 'm') {
       Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc lloc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
-      rv_direct_load_address_to_reg(d, out_ops[i], lloc);
       rv_asm_bound_mem(&bound_outs[i], type, reg);
     } else {
       rv_asm_panic(d, "unsupported output constraint");
@@ -3711,29 +3778,29 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
       if (bound_outs[matched].kind != RV64_INLINE_OPK_REG)
         rv_asm_panic(d, "matching constraint requires register output");
       bound_ins[i] = bound_outs[matched];
-      rv_direct_load_operand_to_reg(
-          d, in_ops[i],
-          native_loc_reg(bound_ins[i].type,
-                     bound_ins[i].pad[0] == RV64_INLINE_OPCLS_FP
-                         ? NATIVE_REG_FP
-                         : NATIVE_REG_INT,
-                     (Reg)bound_ins[i].v.local));
       continue;
     }
-    if (body[0] == 'r' || body[0] == 'f') {
+    NativeAsmRegPin pin;
+    if (rv_asm_resolve_pin_or_panic(d, ins[i].reg, ins[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      rv_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'f') {
       NativeAllocClass cls = rv_asm_constraint_class(d, body);
       Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp);
       rv_asm_bound_reg(&bound_ins[i], type, cls, reg);
-      rv_direct_load_operand_to_reg(d, in_ops[i], native_loc_reg(type, cls, reg));
     } else if (body[0] == 'i') {
       if (in_ops[i].kind != OPK_IMM)
         rv_asm_panic(d, "immediate constraint requires immediate operand");
       bound_ins[i] = in_ops[i];
     } else if (body[0] == 'm') {
       Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc lloc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
-      rv_direct_load_address_to_reg(d, in_ops[i], lloc);
       rv_asm_bound_mem(&bound_ins[i], type, reg);
     } else {
       rv_asm_panic(d, "unsupported input constraint");
@@ -3741,6 +3808,39 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
   }
 
   saved = rv_asm_save_callee_clobbers(a, clob_int, clob_fp, &nsaved);
+  for (i = 0; i < nout; ++i) {
+    if (bound_outs[i].kind == RV64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_outs[i].pad[0] == RV64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      if (outs[i].dir == KIT_CG_ASM_INOUT) {
+        rv_direct_load_operand_to_reg(
+            d, out_ops[i],
+            native_loc_reg(bound_outs[i].type, cls,
+                           (Reg)bound_outs[i].v.local));
+      }
+    } else if (bound_outs[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_outs[i].v.ind.base);
+      rv_direct_load_address_to_reg(d, out_ops[i], loc);
+    }
+  }
+  for (i = 0; i < nin; ++i) {
+    if (bound_ins[i].kind == RV64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_ins[i].pad[0] == RV64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      rv_direct_load_operand_to_reg(
+          d, in_ops[i],
+          native_loc_reg(bound_ins[i].type, cls, (Reg)bound_ins[i].v.local));
+    } else if (bound_ins[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_ins[i].v.ind.base);
+      rv_direct_load_address_to_reg(d, in_ops[i], loc);
+    }
+  }
   asmh = rv64_asm_open(c);
   rv64_inline_bind(asmh, outs, nout, bound_outs, ins, nin, bound_ins, clobbers,
                    nclob);
diff --git a/src/arch/wasm/emit.c b/src/arch/wasm/emit.c
@@ -1858,6 +1858,14 @@ void wasm_asm_block(CGTarget* tg, const char* tmpl, const AsmConstraint* outs,
     if (clob[i] != sym_memory)
       wfail_at(t, loc, "wasm target: asm register clobbers not yet supported");
   }
+  for (i = 0; i < nout; ++i) {
+    if (outs[i].reg)
+      wfail_at(t, loc, "wasm target: asm hard-register operands not supported");
+  }
+  for (i = 0; i < nin; ++i) {
+    if (ins[i].reg)
+      wfail_at(t, loc, "wasm target: asm hard-register operands not supported");
+  }
 
   /* Build a scratch WasmFunc with the synthetic signature. Layout is:
    *   params  = input types (indices 0 .. nin-1)
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -1,8 +1,7 @@
 /* arch/x64/emit.c — byte-level emit helpers, function prologue/epilogue.
  *
  * Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin,
- * x_func_end, and the shared constant tables (g_int_order, g_fp_order,
- * per-ABI int_args tables exposed via X64ABIRegs). */
+ * x_func_end, and the per-ABI int_args tables exposed via X64ABIRegs. */
 
 #include "arch/x64/emit.h"
 
@@ -16,18 +15,6 @@
 /* ============================================================
  * Shared constant tables. */
 
-const Reg g_int_order[6] = {
-    X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */
-    X64_R10,                                     /* caller-saved tail */
-};
-
-const Reg g_fp_order[10] = {
-    /* All xmm regs are caller-saved on SysV; preference order is xmm6
-     * upward to keep the low arg/return regs (xmm0..5) clear for calls. */
-    X64_XMM6,      X64_XMM7,      X64_XMM8,      X64_XMM0 + 9,  X64_XMM0 + 10,
-    X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15,
-};
-
 static const u32 g_int_arg_regs_sysv[6] = {X64_RDI, X64_RSI, X64_RDX,
                                            X64_RCX, X64_R8,  X64_R9};
 static const u32 g_int_arg_regs_win64[4] = {X64_RCX, X64_RDX, X64_R8, X64_R9};
diff --git a/src/arch/x64/emit.h b/src/arch/x64/emit.h
@@ -42,9 +42,6 @@ typedef struct X64ABIRegs {
 
 const X64ABIRegs* x64_abi_for_os(KitOSKind os);
 
-extern const Reg g_int_order[6];
-extern const Reg g_fp_order[10];
-
 /* Per-instruction debug line rows. Declared here (mc.h only forward-declares
  * Debug) so emit.c's encoders and native.c's lifecycle can both record rows
  * without taking a full dependency on debug/debug.h. */
diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c
@@ -14,11 +14,14 @@
  * The single-pass (-O0) prologue reserves a NOP placeholder patched in func_end
  * once max_outgoing and callee-saves are known.
  *
- * Register model. INT scratch (never allocable, never driver scratch): RAX and
- * R11 — the legacy emit paths' fixed temporaries. FP scratch: XMM14 and XMM15.
- * RSP/RBP are reserved (stack/frame pointers). Everything else is allocable.
- * The driver scratch pool is RBX/R12 (int) and XMM12/XMM13 (fp), disjoint from
- * the emit temps so a hook never clobbers an operand parked there. ABI arg/ret
+ * Register model. INT scratch (never allocable, never driver scratch): R10 and
+ * R11 — the emit paths' fixed temporaries. FP scratch: XMM14 and XMM15. RSP/RBP
+ * are reserved (stack/frame pointers). RAX is reserved too (return value, the
+ * div/mul implicit operand), but it is NOT an emit temp, so inline asm may pin
+ * an operand to it (the Linux syscall idiom) — see x64_asm_operand_reg_ok.
+ * Everything else is allocable. The driver scratch pool is RBX/R12 (int) and
+ * XMM12/XMM13 (fp), disjoint from the emit temps so a hook never clobbers an
+ * operand parked there. ABI arg/ret
  * registers are caller-saved-allocable; callee-saved set is resolved per-OS via
  * x64_abi_for_os at runtime (the legality masks below are SysV's, the conserva-
  * tive superset that both ABIs' allocators respect — Win64's extra callee-saves
@@ -46,7 +49,7 @@
 #include "obj/obj.h"
 
 enum {
-  X64_TMP_INT = X64_RAX,      /* emit-internal int scratch (reserved) */
+  X64_TMP_INT = X64_R10,      /* emit-internal int scratch (reserved) */
   X64_TMP_INT2 = X64_R11,     /* emit-internal int scratch (reserved) */
   X64_TMP_FP = X64_XMM0 + 14, /* emit-internal fp scratch (reserved) */
   X64_TMP_FP2 = X64_XMM15,    /* emit-internal fp scratch (reserved) */
@@ -206,14 +209,15 @@ static void emit_jcc_rel32(MCEmitter* mc, u32 cc, MCLabel l);
    .spill_cost = 0u,             \
    .copy_cost = 0u}
 
-/* Allocable int pool, opt's spill/reload set: caller-saved callee-saves first
- * so -O0's local cache prefers regs that don't grow the prologue. RAX/R11 are
- * emit scratch (reserved); RBX/R12 are the driver scratch pool. */
-static const Reg x64_int_allocable[] = {X64_R13, X64_R14, X64_R15, X64_R10};
+/* Allocable int pool, opt's spill/reload set: callee-saves first so the direct
+ * path's local cache prefers regs that don't grow the prologue. R10/R11 are
+ * emit scratch (reserved); RBX/R12 are the driver scratch pool; RAX is reserved
+ * (return / div-mul, asm-pinnable). */
+static const Reg x64_int_allocable[] = {X64_R13, X64_R14, X64_R15};
 static const Reg x64_int_scratch[] = {X64_RBX, X64_R12};
 
 static const NativePhysRegInfo x64_int_phys[] = {
-    X64_PHYS_INT_RESERVED(X64_RAX), /* return / emit scratch */
+    X64_PHYS_INT_RESERVED(X64_RAX), /* return / div-mul (asm-pinnable) */
     X64_PHYS_INT_ARG(X64_RCX),
     X64_PHYS_INT_RET_ARG(X64_RDX),
     X64_PHYS_INT_RESERVED(X64_RBX), /* driver scratch */
@@ -223,7 +227,7 @@ static const NativePhysRegInfo x64_int_phys[] = {
     X64_PHYS_INT_ARG(X64_RDI),
     X64_PHYS_INT_ARG(X64_R8),
     X64_PHYS_INT_ARG(X64_R9),
-    X64_PHYS_INT_CALLER(X64_R10),
+    X64_PHYS_INT_RESERVED(X64_R10), /* emit scratch */
     X64_PHYS_INT_RESERVED(X64_R11), /* emit scratch */
     X64_PHYS_INT_RESERVED(X64_R12), /* driver scratch */
     X64_PHYS_INT_CALLEE(X64_R13),
@@ -299,9 +303,10 @@ static const NativeAllocClassInfo x64_classes[] = {
      .arg_mask = (1u << X64_RDI) | (1u << X64_RSI) | (1u << X64_RDX) |
                  (1u << X64_RCX) | (1u << X64_R8) | (1u << X64_R9),
      .ret_mask = (1u << X64_RAX) | (1u << X64_RDX),
-     /* rax, rsp, rbp, r11 reserved (plus the rbx/r12 driver scratch pool) */
+     /* rax, rsp, rbp reserved; r10/r11 emit scratch; rbx/r12 driver scratch */
      .reserved_mask = (1u << X64_RAX) | (1u << X64_RSP) | (1u << X64_RBP) |
-                      (1u << X64_R11) | (1u << X64_RBX) | (1u << X64_R12)},
+                      (1u << X64_R10) | (1u << X64_R11) | (1u << X64_RBX) |
+                      (1u << X64_R12)},
     {.cls = NATIVE_REG_FP,
      .allocable = x64_fp_allocable,
      .nallocable = sizeof x64_fp_allocable / sizeof x64_fp_allocable[0],
@@ -319,9 +324,64 @@ static const NativeAllocClassInfo x64_classes[] = {
                       (1u << (X64_XMM0 + 14)) | (1u << X64_XMM15)},
 };
 
+/* Resolve a register name ("r10", "xmm3", ...) to its (class, Reg). Powers the
+ * optimizer's inline-asm clobber masks and explicit hard-register operands
+ * ("{r10}" from a GNU local register variable). GPR names map through the HW
+ * encoding; xmm names through the DWARF index table. Returns non-zero for a
+ * non-register name (cc/memory/unknown), which the caller skips. */
+static int x64_resolve_name(const NativeRegInfo* ri, Slice name, Reg* out,
+                            NativeAllocClass* cls_out) {
+  char buf[16];
+  uint32_t idx;
+  (void)ri;
+  if (!name.s || !name.len || name.len >= sizeof buf) return 1;
+  memcpy(buf, name.s, name.len);
+  buf[name.len] = '\0';
+  if (x64_register_hw_index(buf, &idx) == 0 && idx <= 15u) {
+    *cls_out = NATIVE_REG_INT;
+    *out = (Reg)idx;
+    return 0;
+  }
+  if (x64_register_index(buf, &idx) == 0 && idx >= 17u && idx <= 32u) {
+    *cls_out = NATIVE_REG_FP;
+    *out = (Reg)(idx - 17u);
+    return 0;
+  }
+  return 1;
+}
+
+static int x64_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
+                                  Reg reg) {
+  (void)ri;
+  if (cls == NATIVE_REG_INT) {
+    switch (reg) {
+      /* RAX is reserved but not an emit temp, so it is a legal asm pin (the
+       * Linux syscall number/return register). R10/R11 are emit scratch and
+       * RBX/R12 the driver scratch pool, so those stay excluded. */
+      case X64_RAX:
+      case X64_RCX:
+      case X64_RDX:
+      case X64_RSI:
+      case X64_RDI:
+      case X64_R8:
+      case X64_R9:
+      case X64_R13:
+      case X64_R14:
+      case X64_R15:
+        return 1;
+      default:
+        return 0;
+    }
+  }
+  if (cls == NATIVE_REG_FP) return reg <= X64_XMM0 + 11u;
+  return 0;
+}
+
 static const NativeRegInfo x64_reg_info = {
     .classes = x64_classes,
     .nclasses = sizeof x64_classes / sizeof x64_classes[0],
+    .resolve_name = x64_resolve_name,
+    .asm_operand_reg_ok = x64_asm_operand_reg_ok,
 };
 
 /* ============================ legality ============================ */
@@ -1904,7 +1964,8 @@ static const ABIArgInfo* x64_param_abi(NativeTarget* t, const ABIFuncInfo* abi,
   ((ABIArgPart*)scratch->parts)[0].cls =
       cg_type_is_float(t->c, desc->args[i].type) ? ABI_CLASS_FP : ABI_CLASS_INT;
   ((ABIArgPart*)scratch->parts)[0].loc = ABI_LOC_REG;
-  ((ABIArgPart*)scratch->parts)[0].size = native_type_size(t, desc->args[i].type);
+  ((ABIArgPart*)scratch->parts)[0].size =
+      native_type_size(t, desc->args[i].type);
   ((ABIArgPart*)scratch->parts)[0].align =
       native_type_align(t, desc->args[i].type);
   return scratch;
@@ -2187,7 +2248,7 @@ static void x64_bind_native_param(NativeTarget* t, const CGParamDesc* p,
         x64_defer_reg_bind(
             a,
             native_loc_reg(dst.type ? dst.type : p->type,
-                        (NativeAllocClass)dst.cls, (Reg)dst.v.reg),
+                           (NativeAllocClass)dst.cls, (Reg)dst.v.reg),
             isrc, part->size);
       } else {
         /* Frame dst: load to scratch then store (memory dst is never a cycle
@@ -2196,8 +2257,8 @@ static void x64_bind_native_param(NativeTarget* t, const CGParamDesc* p,
         NativeLoc tloc = native_loc_reg(p->type, cls, tmp);
         x64_load_part(t, tloc, isrc, 0, part->size);
         x64_store_part(
-            t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset), tloc,
-            0, part->size);
+            t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
+            tloc, 0, part->size);
       }
     }
     return;
@@ -2227,22 +2288,23 @@ static void x64_bind_native_param(NativeTarget* t, const CGParamDesc* p,
        * incoming arg registers, so a per-param move could clobber a register
        * another bind still needs. x64_bind_params_end resolves them together as
        * a parallel copy. */
-      x64_defer_reg_bind(a,
-                         native_loc_reg(dst.type ? dst.type : p->type,
-                                     (NativeAllocClass)dst.cls, (Reg)dst.v.reg),
-                         src, part->size);
+      x64_defer_reg_bind(
+          a,
+          native_loc_reg(dst.type ? dst.type : p->type,
+                         (NativeAllocClass)dst.cls, (Reg)dst.v.reg),
+          src, part->size);
     } else if (src.kind == NATIVE_LOC_REG) {
-      x64_store_part(t,
-                     native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
-                     src, 0, part->size);
+      x64_store_part(
+          t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset), src,
+          0, part->size);
     } else {
       /* Stack source -> frame dst: load to scratch, then store. */
       Reg tmp = cls == NATIVE_REG_FP ? X64_TMP_FP : X64_TMP_INT;
       NativeLoc tloc = native_loc_reg(p->type, cls, tmp);
       x64_load_part(t, tloc, src, 0, part->size);
-      x64_store_part(t,
-                     native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
-                     tloc, 0, part->size);
+      x64_store_part(
+          t, native_loc_stack(p->type, dst.v.frame, (i32)part->src_offset),
+          tloc, 0, part->size);
     }
   }
   a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u);
@@ -2352,7 +2414,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
   if (plan->callee.kind == NATIVE_LOC_REG &&
       (NativeAllocClass)plan->callee.cls == NATIVE_REG_INT &&
       plan->callee.v.reg != X64_R11) {
-    NativeLoc scratch = native_loc_reg(plan->callee.type, NATIVE_REG_INT, X64_R11);
+    NativeLoc scratch =
+        native_loc_reg(plan->callee.type, NATIVE_REG_INT, X64_R11);
     x64_move(t, scratch, plan->callee);
     plan->callee = scratch;
   }
@@ -2408,8 +2471,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
           X64ArgMove* m = &moves[nmoves++];
           u32 slot = next_fp;
           memset(m, 0, sizeof *m);
-          m->dst =
-              native_loc_reg(desc->args[i].type, cls, (Reg)(X64_XMM0 + next_fp++));
+          m->dst = native_loc_reg(desc->args[i].type, cls,
+                                  (Reg)(X64_XMM0 + next_fp++));
           m->src = desc->args[i];
           m->src_offset = part->src_offset;
           m->size = part->size;
@@ -2422,8 +2485,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
         } else if (cls == NATIVE_REG_INT && next_int < aregs->n_int_args) {
           X64ArgMove* m = &moves[nmoves++];
           memset(m, 0, sizeof *m);
-          m->dst =
-              native_loc_reg(desc->args[i].type, cls, aregs->int_args[next_int++]);
+          m->dst = native_loc_reg(desc->args[i].type, cls,
+                                  aregs->int_args[next_int++]);
           m->src = desc->args[i];
           m->src_offset = part->src_offset;
           m->size = part->size;
@@ -2451,7 +2514,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
        * the address of this call's result slot. */
       NativeLoc sret = native_loc_reg(i64t, NATIVE_REG_INT, aregs->int_args[0]);
       if (tail)
-        x64_load_part(t, sret, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0, 8);
+        x64_load_part(t, sret, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0,
+                      8);
       else
         x64_addr_of_loc(t, sret, desc->results[0]);
     }
@@ -2474,8 +2538,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
       rets[nr].src = native_loc_reg(pty, cls, rreg);
       rets[nr].dst = desc->results[0];
       if (rets[nr].dst.kind == NATIVE_LOC_FRAME)
-        rets[nr].dst =
-            native_loc_stack(pty, desc->results[0].v.frame, (i32)part->src_offset);
+        rets[nr].dst = native_loc_stack(pty, desc->results[0].v.frame,
+                                        (i32)part->src_offset);
       else if (rets[nr].dst.kind == NATIVE_LOC_STACK) {
         rets[nr].dst.v.stack.offset += (i32)part->src_offset;
         rets[nr].dst.type = pty;
@@ -2487,7 +2551,8 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
   } else if (abi && abi->ret.kind == ABI_ARG_IGNORE) {
     plan->nrets = 0;
   } else if (!abi && desc->nresults) {
-    rets[0].src = native_loc_reg(desc->results[0].type, NATIVE_REG_INT, X64_RAX);
+    rets[0].src =
+        native_loc_reg(desc->results[0].type, NATIVE_REG_INT, X64_RAX);
     rets[0].dst = desc->results[0];
     rets[0].mem = native_mem_for_type(t, desc->results[0].type, 0);
     plan->nrets = 1;
@@ -2604,7 +2669,8 @@ static void x64_plan_ret(NativeTarget* t, const CGFuncDesc* fd,
     access.align = native_type_align(t, values[0].type);
     x64_copy_bytes(t, dst_addr, src_addr, access);
     /* rax = sret pointer. Reload it (copy_bytes clobbered r11/rax). */
-    x64_load_part(t, native_loc_reg(i64t, NATIVE_REG_INT, X64_RAX), saved, 0, 8);
+    x64_load_part(t, native_loc_reg(i64t, NATIVE_REG_INT, X64_RAX), saved, 0,
+                  8);
     *out_rets = NULL;
     *out_nrets = 0;
     return;
@@ -3583,12 +3649,23 @@ static NativeAllocClass x64_asm_constraint_class(NativeDirectTarget* d,
   return NATIVE_REG_INT;
 }
 
+static int x64_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
+                                        const char* constraint,
+                                        NativeAsmRegPin* pin) {
+  NativeAsmRegPinStatus st =
+      native_asm_resolve_pin(d->native, reg, constraint, pin);
+  if (st == NATIVE_ASM_REG_PIN_ABSENT) return 0;
+  if (st != NATIVE_ASM_REG_PIN_OK)
+    x64_asm_panic(d, native_asm_pin_status_message(st));
+  return 1;
+}
+
 /* Pick a free register from caller-saved allocable pools for an asm operand the
  * direct path self-allocates. */
 static Reg x64_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
                              u32* used_int, u32* used_fp) {
-  static const Reg int_pool[] = {X64_RDI, X64_RSI, X64_RDX, X64_RCX,
-                                 X64_R8,  X64_R9,  X64_R10};
+  static const Reg int_pool[] = {X64_RDI, X64_RSI, X64_RDX,
+                                 X64_RCX, X64_R8,  X64_R9};
   static const Reg fp_pool[] = {
       X64_XMM0, X64_XMM1, X64_XMM2, X64_XMM3,     X64_XMM4,      X64_XMM5,
       X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10, X64_XMM0 + 11};
@@ -3731,9 +3808,9 @@ static void x64_asm_restore_one(X64NativeTarget* a,
   addr.base_kind = NATIVE_ADDR_BASE_FRAME;
   addr.base.frame = s->slot;
   addr.base_type = s->type;
-  x64_emit_mem(
-      a, 1, native_loc_reg(s->type, s->cls, s->reg), addr,
-      native_mem_for_type(&a->base, s->type, s->cls == NATIVE_REG_FP ? 16u : 8u));
+  x64_emit_mem(a, 1, native_loc_reg(s->type, s->cls, s->reg), addr,
+               native_mem_for_type(&a->base, s->type,
+                                   s->cls == NATIVE_REG_FP ? 16u : 8u));
 }
 
 /* SysV callee-saved: int rbx,r12-r15; no fp. Win64 adds rdi,rsi + xmm6-15. */
@@ -3810,12 +3887,13 @@ static Reg x64_asm_native_mem_base(X64NativeTarget* a, SrcLoc loc,
   Reg dst;
   if (addr.base_kind == NATIVE_ADDR_BASE_REG && addr.offset == 0 &&
       addr.index_kind == NATIVE_ADDR_INDEX_NONE) {
-    if ((addr.base.reg & 0xfu) != X64_RAX && (addr.base.reg & 0xfu) != X64_R11)
+    if ((addr.base.reg & 0xfu) != X64_TMP_INT &&
+        (addr.base.reg & 0xfu) != X64_TMP_INT2)
       return (Reg)(addr.base.reg & 0xfu);
   }
   if (*ntmp >= 2u)
     x64_asm_panic_at(a->base.c, loc, "too many memory asm operands");
-  dst = (*ntmp == 0u) ? (Reg)X64_RAX : (Reg)X64_R11;
+  dst = (*ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2;
   (*ntmp)++;
   x64_addr_to_base_reg(a, addr, dst);
   return dst;
@@ -3880,7 +3958,7 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl,
     if ((body[0] == 'r') && inloc.kind != NATIVE_LOC_REG) {
       Reg r;
       if (ntmp >= 2u) x64_asm_panic_at(c, loc, "too many memory asm operands");
-      r = (ntmp == 0u) ? (Reg)X64_RAX : (Reg)X64_R11;
+      r = (ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2;
       ntmp++;
       inloc = native_loc_reg(type, NATIVE_REG_INT, r);
       x64_emit_mem(a, 1, inloc, x64_asm_loc_to_addr(a, loc, in_locs[i]),
@@ -4133,7 +4211,7 @@ static void x64_va_arg_(NativeDirectTarget* d, Operand dst, Operand ap_addr,
   X64NativeTarget* a = x64_of(d->native);
   int is_fp = cg_type_is_float(d->base.c, type);
   NativeLoc res = native_loc_reg(type, is_fp ? NATIVE_REG_FP : NATIVE_REG_INT,
-                              is_fp ? X64_TMP_FP : (Reg)X64_RDX);
+                                 is_fp ? X64_TMP_FP : (Reg)X64_RDX);
   NativeAddr dst_addr;
   /* Base in R11: the core advances/loads through R11 plus one GPR scratch (the
    * integer result reg itself, or RAX for FP results), so R11 must not be RAX.
@@ -4180,7 +4258,8 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
   native_asm_abi_clobber_masks(d->native, clobber_abi_sets, &abi_int, &abi_fp);
   clob_int |= abi_int;
   clob_fp |= abi_fp;
-  /* Reserve emit scratch (rax,r11), driver scratch, sp/bp, and clobbers. */
+  /* Reserve emit scratch (r10,r11), driver scratch (rbx,r12), rax (reserved;
+   * only self-allocated here when explicitly pinned), sp/bp, and clobbers. */
   used_int = clob_int | (1u << X64_RAX) | (1u << X64_R11) | (1u << X64_RSP) |
              (1u << X64_RBP) | (1u << X64_RBX) | (1u << X64_R12) |
              (1u << X64_R10);
@@ -4190,18 +4269,23 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
   for (i = 0; i < nout; ++i) {
     const char* body = native_asm_constraint_body(outs[i].str);
     KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
-    if (body[0] == 'r' || body[0] == 'x') {
+    NativeAsmRegPin pin;
+    if (x64_asm_resolve_pin_or_panic(d, outs[i].reg, outs[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      x64_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'x') {
       NativeAllocClass cls = x64_asm_constraint_class(d, body);
       Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp);
       x64_asm_bound_reg(&bound_outs[i], type, cls, reg);
-      if (outs[i].dir == KIT_CG_ASM_INOUT)
-        x64_direct_load_operand_to_reg(d, out_ops[i],
-                                       native_loc_reg(type, cls, reg));
     } else if (body[0] == 'm') {
       Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc lloc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
-      x64_direct_load_address_to_reg(d, out_ops[i], lloc);
       x64_asm_bound_mem(&bound_outs[i], type, reg);
     } else {
       x64_asm_panic(d, "unsupported output constraint");
@@ -4220,29 +4304,29 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
       if (bound_outs[matched].kind != X64_INLINE_OPK_REG)
         x64_asm_panic(d, "matching constraint requires register output");
       bound_ins[i] = bound_outs[matched];
-      x64_direct_load_operand_to_reg(
-          d, in_ops[i],
-          native_loc_reg(bound_ins[i].type,
-                      bound_ins[i].pad[0] == X64_INLINE_OPCLS_FP
-                          ? NATIVE_REG_FP
-                          : NATIVE_REG_INT,
-                      (Reg)bound_ins[i].v.local));
       continue;
     }
-    if (body[0] == 'r' || body[0] == 'x') {
+    NativeAsmRegPin pin;
+    if (x64_asm_resolve_pin_or_panic(d, ins[i].reg, ins[i].str, &pin)) {
+      /* GNU local register variable: pin to the named hard register. */
+      if (pin.cls == NATIVE_REG_FP) {
+        used_fp |= 1u << pin.reg;
+        clob_fp |= 1u << pin.reg;
+      } else {
+        used_int |= 1u << pin.reg;
+        clob_int |= 1u << pin.reg;
+      }
+      x64_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
+    } else if (body[0] == 'r' || body[0] == 'x') {
       NativeAllocClass cls = x64_asm_constraint_class(d, body);
       Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp);
       x64_asm_bound_reg(&bound_ins[i], type, cls, reg);
-      x64_direct_load_operand_to_reg(d, in_ops[i], native_loc_reg(type, cls, reg));
     } else if (body[0] == 'i') {
       if (in_ops[i].kind != OPK_IMM)
         x64_asm_panic(d, "immediate constraint requires immediate operand");
       bound_ins[i] = in_ops[i];
     } else if (body[0] == 'm') {
       Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
-      NativeLoc lloc =
-          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
-      x64_direct_load_address_to_reg(d, in_ops[i], lloc);
       x64_asm_bound_mem(&bound_ins[i], type, reg);
     } else {
       x64_asm_panic(d, "unsupported input constraint");
@@ -4250,6 +4334,39 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
   }
 
   saved = x64_asm_save_callee_clobbers(a, clob_int, clob_fp, &nsaved);
+  for (i = 0; i < nout; ++i) {
+    if (bound_outs[i].kind == X64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_outs[i].pad[0] == X64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      if (outs[i].dir == KIT_CG_ASM_INOUT) {
+        x64_direct_load_operand_to_reg(
+            d, out_ops[i],
+            native_loc_reg(bound_outs[i].type, cls,
+                           (Reg)bound_outs[i].v.local));
+      }
+    } else if (bound_outs[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_outs[i].v.ind.base);
+      x64_direct_load_address_to_reg(d, out_ops[i], loc);
+    }
+  }
+  for (i = 0; i < nin; ++i) {
+    if (bound_ins[i].kind == X64_INLINE_OPK_REG) {
+      NativeAllocClass cls = bound_ins[i].pad[0] == X64_INLINE_OPCLS_FP
+                                 ? NATIVE_REG_FP
+                                 : NATIVE_REG_INT;
+      x64_direct_load_operand_to_reg(
+          d, in_ops[i],
+          native_loc_reg(bound_ins[i].type, cls, (Reg)bound_ins[i].v.local));
+    } else if (bound_ins[i].kind == OPK_INDIRECT) {
+      NativeLoc loc =
+          native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT,
+                         (Reg)bound_ins[i].v.ind.base);
+      x64_direct_load_address_to_reg(d, in_ops[i], loc);
+    }
+  }
   asmh = x64_asm_open(c);
   x64_inline_bind(asmh, outs, nout, bound_outs, ins, nin, bound_ins, clobbers,
                   nclob);
diff --git a/src/cg/asm.c b/src/cg/asm.c
@@ -35,7 +35,9 @@ int api_asm_is_early_clobber(const char* s) {
  * SIMD/FP) are the per-target FP/vector register classes. The temp local's type
  * selects the actual NativeAllocClass downstream, and the target's asm hook
  * rejects a letter that does not apply to it, so listing all three here is safe
- * across backends. */
+ * across backends. A hard-register pin (AsmConstraint.reg, from a GNU local
+ * register variable) rides alongside such a register operand and does not
+ * change this classification — the constraint letter stays "r". */
 int api_asm_is_reg_constraint(char c) {
   return c == 'r' || c == 'f' || c == 'x' || c == 'w';
 }
@@ -97,8 +99,15 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
       outs[i].str = api_sym_cstr(g, outputs[i].constraint);
       outs[i].name = (Sym)outputs[i].name;
       outs[i].type = resolve_type(g->c, outputs[i].type);
+      outs[i].reg = (Sym)outputs[i].reg;
       outs[i].dir = (u8)outputs[i].dir;
       if (!outs[i].type) outs[i].type = fallback_ty;
+      if (outs[i].reg &&
+          !api_asm_is_reg_constraint(api_asm_constraint_body(outs[i].str)[0])) {
+        compiler_panic(g->c, g->cur_loc,
+                       "KitCg: asm hard-register output requires a register "
+                       "constraint");
+      }
       if (outs[i].dir == KIT_CG_ASM_INOUT) {
         if (i >= 10) {
           compiler_panic(g->c, g->cur_loc,
@@ -130,8 +139,15 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
       ins[i].str = api_sym_cstr(g, inputs[i].constraint);
       ins[i].name = (Sym)inputs[i].name;
       ins[i].type = resolve_type(g->c, inputs[i].type);
+      ins[i].reg = (Sym)inputs[i].reg;
       ins[i].dir = (u8)inputs[i].dir;
       if (!ins[i].type) ins[i].type = fallback_ty;
+      if (ins[i].reg &&
+          !api_asm_is_reg_constraint(api_asm_constraint_body(ins[i].str)[0])) {
+        compiler_panic(g->c, g->cur_loc,
+                       "KitCg: asm hard-register input requires a register "
+                       "constraint");
+      }
     }
     inout_index = ninputs;
     for (u32 i = 0; i < noutputs; ++i) {
diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h
@@ -409,6 +409,10 @@ typedef struct AsmConstraint {
                        input rvalue). Drives type width for the binder.
                        NULL only for hand-built test constraints (binder
                        falls back to a 64-bit int default). */
+  Sym reg;          /* Explicit hard-register name ("r10"/"x8"/...) this operand
+                       must occupy — a GNU local register variable bound as an
+                       operand; 0 = unconstrained. Only the target's register
+                       file resolves the name to a physical register. */
   u8 dir;           /* KitCgAsmDir */
   u8 pad[3];
 } AsmConstraint;
diff --git a/src/cg/native_asm.c b/src/cg/native_asm.c
@@ -3,6 +3,7 @@
 #include "arch/mc.h"
 #include "asm/asm.h"
 #include "asm/asm_lex.h"
+#include "core/pool.h" /* pool_slice for native_asm_resolve_pin */
 
 void native_file_scope_asm(NativeTarget* t, const char* src, size_t len) {
   AsmLexer* lex = asm_lex_open_mem(t->c, "<file-scope-asm>", src, len);
@@ -48,3 +49,76 @@ void native_asm_abi_clobber_masks(NativeTarget* t, u32 abi_sets, u32* int_mask,
     *fp_mask |= classes[NATIVE_REG_FP].callee_saved_mask;
   }
 }
+
+int native_asm_constraint_reg_class(const char* constraint,
+                                    NativeAllocClass* cls_out) {
+  const char* body = native_asm_constraint_body(constraint);
+  if (!body || !body[0]) return 0;
+  if (body[0] == 'r') {
+    if (cls_out) *cls_out = NATIVE_REG_INT;
+    return 1;
+  }
+  if (body[0] == 'f' || body[0] == 'x' || body[0] == 'w') {
+    if (cls_out) *cls_out = NATIVE_REG_FP;
+    return 1;
+  }
+  return 0;
+}
+
+static int native_asm_default_operand_reg_ok(const NativeRegInfo* ri,
+                                             NativeAllocClass cls, Reg reg) {
+  if (!ri || cls >= ri->nclasses) return 0;
+  const NativeAllocClassInfo* ci = &ri->classes[cls];
+  for (u32 i = 0; i < ci->nphys; ++i) {
+    const NativePhysRegInfo* pi = &ci->phys[i];
+    if (pi->reg != reg) continue;
+    return (pi->flags & NATIVE_REG_RESERVED) == 0;
+  }
+  return 0;
+}
+
+NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
+                                             const char* constraint,
+                                             NativeAsmRegPin* out) {
+  Reg r;
+  NativeAllocClass cls;
+  NativeAllocClass want;
+  if (!reg) return NATIVE_ASM_REG_PIN_ABSENT;
+  if (!t || !t->regs || !t->regs->resolve_name)
+    return NATIVE_ASM_REG_PIN_UNKNOWN;
+  if (t->regs->resolve_name(t->regs, pool_slice(t->c->global, reg), &r, &cls) !=
+      0)
+    return NATIVE_ASM_REG_PIN_UNKNOWN;
+  if (t->regs->asm_operand_reg_ok) {
+    if (!t->regs->asm_operand_reg_ok(t->regs, cls, r))
+      return NATIVE_ASM_REG_PIN_FORBIDDEN;
+  } else if (!native_asm_default_operand_reg_ok(t->regs, cls, r)) {
+    return NATIVE_ASM_REG_PIN_FORBIDDEN;
+  }
+  if (!native_asm_constraint_reg_class(constraint, &want))
+    return NATIVE_ASM_REG_PIN_BAD_CONSTRAINT;
+  if (want != cls) return NATIVE_ASM_REG_PIN_CLASS_MISMATCH;
+  if (out) {
+    out->reg = r;
+    out->cls = cls;
+  }
+  return NATIVE_ASM_REG_PIN_OK;
+}
+
+const char* native_asm_pin_status_message(NativeAsmRegPinStatus st) {
+  switch (st) {
+    case NATIVE_ASM_REG_PIN_ABSENT:
+      return "no hard register pin";
+    case NATIVE_ASM_REG_PIN_OK:
+      return "hard register pin resolved";
+    case NATIVE_ASM_REG_PIN_UNKNOWN:
+      return "unknown asm register variable name";
+    case NATIVE_ASM_REG_PIN_FORBIDDEN:
+      return "asm register variable names an unsupported register";
+    case NATIVE_ASM_REG_PIN_BAD_CONSTRAINT:
+      return "asm register variable requires a register constraint";
+    case NATIVE_ASM_REG_PIN_CLASS_MISMATCH:
+      return "asm register variable class does not match its constraint";
+  }
+  return "invalid asm register variable";
+}
diff --git a/src/cg/native_asm.h b/src/cg/native_asm.h
@@ -46,4 +46,29 @@ int native_asm_match_index(const char* s);
 void native_asm_abi_clobber_masks(NativeTarget* t, u32 abi_sets, u32* int_mask,
                                   u32* fp_mask);
 
+typedef enum NativeAsmRegPinStatus {
+  NATIVE_ASM_REG_PIN_ABSENT = 0,
+  NATIVE_ASM_REG_PIN_OK = 1,
+  NATIVE_ASM_REG_PIN_UNKNOWN = -1,
+  NATIVE_ASM_REG_PIN_FORBIDDEN = -2,
+  NATIVE_ASM_REG_PIN_BAD_CONSTRAINT = -3,
+  NATIVE_ASM_REG_PIN_CLASS_MISMATCH = -4,
+} NativeAsmRegPinStatus;
+
+typedef struct NativeAsmRegPin {
+  Reg reg;
+  NativeAllocClass cls;
+} NativeAsmRegPin;
+
+/* Resolve and validate an inline-asm operand's explicit hard-register pin
+ * (AsmConstraint.reg, from a GNU local register variable). Distinguishes no pin
+ * from invalid pins, and verifies that the operand uses a register constraint
+ * of the matching target class. */
+NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
+                                             const char* constraint,
+                                             NativeAsmRegPin* out);
+const char* native_asm_pin_status_message(NativeAsmRegPinStatus st);
+int native_asm_constraint_reg_class(const char* constraint,
+                                    NativeAllocClass* cls_out);
+
 #endif
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -1042,7 +1042,12 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges,
 
     if (gi.tied_hard_reg >= 0) {
       Reg fixed = (Reg)gi.tied_hard_reg;
-      if (!hard_available(f, cls, fixed)) {
+      /* Machinize has already validated inline-asm hard-register pins against
+       * the target's operand-register policy. Some legal pins are ABI registers
+       * outside the standard allocable set (aa64 x0, rv64 a7), so the allocator
+       * accepts validated physical registers here and relies on the
+       * conflict/clobber checks below for placement correctness. */
+      if (!hard_available(f, cls, fixed) && !phys_info_for(f, cls, fixed)) {
         SrcLoc loc = {0, 0, 0};
         compiler_panic(
             f->c, loc,
diff --git a/src/opt/pass_machinize.c b/src/opt/pass_machinize.c
@@ -1,18 +1,12 @@
 #include <string.h>
 
+#include "cg/native_asm.h"
 #include "cg/type.h"
 #include "core/pool.h"
 #include "core/slice.h"
 #include "opt/opt_internal.h"
 
-static const char* asm_constraint_body(const char* s) {
-  if (!s) return "";
-  if (s[0] == '=' && s[1] == '&') return s + 2;
-  if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1;
-  return s;
-}
-
-static int native_resolve_reg(NativeTarget* target, Sym name, Reg* out,
+static int native_resolve_reg(NativeTarget* target, Slice name, Reg* out,
                               RegClass* cls_out) {
   NativeAllocClass cls;
   if (!target || !target->regs || !target->regs->resolve_name) return 1;
@@ -21,19 +15,6 @@ static int native_resolve_reg(NativeTarget* target, Sym name, Reg* out,
   return 0;
 }
 
-static int asm_resolve_fixed_constraint(Func* f, NativeTarget* target,
-                                        const char* constraint, Reg* reg_out,
-                                        RegClass* cls_out) {
-  const char* body = asm_constraint_body(constraint);
-  if (body[0] != '{') return 0;
-  const char* end = body + 1;
-  while (*end && *end != '}') ++end;
-  if (*end != '}' || end == body + 1) return 0;
-  Sym name = pool_intern_slice(
-      f->c->global, (Slice){.s = body + 1, .len = (size_t)(end - body - 1)});
-  return native_resolve_reg(target, name, reg_out, cls_out) == 0;
-}
-
 static void asm_prepare_constraints(Func* f, NativeTarget* target,
                                     IRAsmAux* aux) {
   if (!aux) return;
@@ -51,24 +32,33 @@ static void asm_prepare_constraints(Func* f, NativeTarget* target,
   for (u32 i = 0; i < aux->nclob; ++i) {
     Reg r;
     RegClass cls;
-    if (native_resolve_reg(target, aux->clobbers[i], &r, &cls) != 0) continue;
+    Slice nm = pool_slice(f->c->global, aux->clobbers[i]);
+    if (native_resolve_reg(target, nm, &r, &cls) != 0) continue;
     if ((u32)cls < OPT_REG_CLASSES && r < 32) aux->clobber_mask[cls] |= 1u << r;
   }
   for (u32 i = 0; i < aux->nout; ++i) {
-    Reg r;
-    RegClass cls;
-    if (asm_resolve_fixed_constraint(f, target, aux->outs[i].str, &r, &cls)) {
-      aux->out_fixed_regs[i] = (i32)r;
-      aux->out_fixed_cls[i] = (u8)cls;
+    NativeAsmRegPin pin;
+    NativeAsmRegPinStatus st = native_asm_resolve_pin(target, aux->outs[i].reg,
+                                                      aux->outs[i].str, &pin);
+    if (st == NATIVE_ASM_REG_PIN_ABSENT) continue;
+    if (st != NATIVE_ASM_REG_PIN_OK) {
+      compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s",
+                     native_asm_pin_status_message(st));
     }
+    aux->out_fixed_regs[i] = (i32)pin.reg;
+    aux->out_fixed_cls[i] = (u8)pin.cls;
   }
   for (u32 i = 0; i < aux->nin; ++i) {
-    Reg r;
-    RegClass cls;
-    if (asm_resolve_fixed_constraint(f, target, aux->ins[i].str, &r, &cls)) {
-      aux->in_fixed_regs[i] = (i32)r;
-      aux->in_fixed_cls[i] = (u8)cls;
+    NativeAsmRegPin pin;
+    NativeAsmRegPinStatus st =
+        native_asm_resolve_pin(target, aux->ins[i].reg, aux->ins[i].str, &pin);
+    if (st == NATIVE_ASM_REG_PIN_ABSENT) continue;
+    if (st != NATIVE_ASM_REG_PIN_OK) {
+      compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s",
+                     native_asm_pin_status_message(st));
     }
+    aux->in_fixed_regs[i] = (i32)pin.reg;
+    aux->in_fixed_cls[i] = (u8)pin.cls;
   }
 }
 
diff --git a/test/arch/x64_inline_test.c b/test/arch/x64_inline_test.c
@@ -21,6 +21,19 @@ static void x64_bad_operand(KitCompiler* c, KitCg* cg, KitCgTypeId i64_ty) {
   it_inline_asm(c, cg, "movq %9, %%rax", NULL, 0, NULL, 0, NULL, 0);
 }
 
+/* A GNU local register variable pinned to %rax — the Linux syscall idiom
+ * (syscall number in rax). rax is reserved (return / div-mul), but no longer
+ * an emit-internal scratch register, so an asm operand may pin it while the
+ * allocator still leaves it alone. Clobbers rcx/r11 as `syscall` does. */
+static void x64_rax_pin(KitCompiler* c, KitCg* cg, KitCgTypeId i64_ty) {
+  KitCgAsmOperand in;
+  const char* clob[] = {"rcx", "r11", "memory"};
+  in = it_asm_op(c, "r", "n", i64_ty, KIT_CG_ASM_IN);
+  in.reg = kit_sym_intern(c, kit_slice_cstr("rax"));
+  kit_cg_push_int(cg, 60, i64_ty); /* SYS_exit */
+  it_inline_asm(c, cg, "syscall", NULL, 0, &in, 1, clob, 3);
+}
+
 int main(void) {
   static const uint8_t nops[] = {0x90u, 0x90u};
   static const uint8_t movq_rcx_rax[] = {0x48u, 0x89u, 0xc8u};
@@ -51,6 +64,20 @@ int main(void) {
                             x64_bad_operand, "operand index"),
             "expected out-of-range x64 asm operand to panic");
 
+  {
+    static const uint8_t syscall_bytes[] = {0x0fu, 0x05u};
+    InlineText sc;
+    IT_EXPECT(&env,
+              it_emit_text(&env, KIT_ARCH_X86_64, "x64_rax_pin", x64_rax_pin,
+                           &sc),
+              "failed to emit rax-pinned syscall inline asm");
+    if (sc.data)
+      IT_EXPECT(
+          &env, it_contains(sc.data, sc.len, syscall_bytes, sizeof syscall_bytes),
+          "missing syscall encoding for rax-pinned operand");
+    it_text_close(&sc);
+  }
+
   if (env.fails) {
     fprintf(stderr, "%d failure(s)\n", env.fails);
     return 1;
diff --git a/test/parse/cases/asm_03_register_operand.c b/test/parse/cases/asm_03_register_operand.c
@@ -0,0 +1,37 @@
+/* GNU local register variables (`register T x __asm__("reg")`) used as inline-
+ * asm operands must occupy the named hard register. Each template below
+ * addresses its registers directly (not via %N operand substitution), so the
+ * result is correct only when a/b/r really land in the pinned registers — a
+ * regression guard for register-variable operand pinning through the native
+ * backends (-O0 direct and the optimizer's tied-hard-reg path). 40 + 2 == 42.
+ *
+ * Arch-guarded; the asm is target-specific. The wasm backend has no native
+ * hard-register file, so this case opts out of W via sidecar. */
+
+int test_main(void) {
+  long out = 0;
+#if defined(__aarch64__)
+  register long a __asm__("x12") = 40;
+  register long b __asm__("x13") = 2;
+  register long r __asm__("x14");
+  __asm__ volatile("add x14, x12, x13" : "=r"(r) : "r"(a), "r"(b));
+  out = r;
+#elif defined(__x86_64__)
+  register long a __asm__("rdi") = 40;
+  register long b __asm__("rsi") = 2;
+  register long r __asm__("rdx");
+  __asm__ volatile("movq %%rdi, %%rdx\n\taddq %%rsi, %%rdx"
+                   : "=r"(r)
+                   : "r"(a), "r"(b));
+  out = r;
+#elif defined(__riscv) && __riscv_xlen == 64
+  register long a __asm__("a3") = 40;
+  register long b __asm__("a4") = 2;
+  register long r __asm__("a5");
+  __asm__ volatile("add a5, a3, a4" : "=r"(r) : "r"(a), "r"(b));
+  out = r;
+#else
+  out = 42;
+#endif
+  return (int)out;
+}
diff --git a/test/parse/cases/asm_03_register_operand.expected b/test/parse/cases/asm_03_register_operand.expected
@@ -0,0 +1 @@
+42
diff --git a/test/parse/cases/asm_03_register_operand.wasm.skip b/test/parse/cases/asm_03_register_operand.wasm.skip
@@ -0,0 +1 @@
+native hard-register operands are not meaningful for the wasm backend
diff --git a/test/parse/cases/asm_04_register_callee_saved.c b/test/parse/cases/asm_04_register_callee_saved.c
@@ -0,0 +1,71 @@
+/* A hard-register inline-asm operand may name a callee-saved register. The
+ * callee must preserve the caller's register value even though it loads the asm
+ * operand through that hard register internally. */
+
+#if defined(__aarch64__)
+__asm__(
+    ".text\n"
+    ".globl write_saved_reg\n"
+    "write_saved_reg:\n"
+    "mov x19, x0\n"
+    "ret\n"
+    ".globl read_saved_reg\n"
+    "read_saved_reg:\n"
+    "mov x0, x19\n"
+    "ret\n");
+extern void write_saved_reg(long);
+extern long read_saved_reg(void);
+#elif defined(__x86_64__)
+__asm__(
+    ".text\n"
+    ".globl write_saved_reg\n"
+    "write_saved_reg:\n"
+    "movq %rdi, %r13\n"
+    "retq\n"
+    ".globl read_saved_reg\n"
+    "read_saved_reg:\n"
+    "movq %r13, %rax\n"
+    "retq\n");
+extern void write_saved_reg(long);
+extern long read_saved_reg(void);
+#elif defined(__riscv) && __riscv_xlen == 64
+__asm__(
+    ".text\n"
+    ".globl write_saved_reg\n"
+    "write_saved_reg:\n"
+    "mv s1, a0\n"
+    "ret\n"
+    ".globl read_saved_reg\n"
+    "read_saved_reg:\n"
+    "mv a0, s1\n"
+    "ret\n");
+extern void write_saved_reg(long);
+extern long read_saved_reg(void);
+#else
+static long saved_fallback;
+static void write_saved_reg(long v) { saved_fallback = v; }
+static long read_saved_reg(void) { return saved_fallback; }
+#endif
+
+__attribute__((noinline)) static void touch_callee_saved(void) {
+#if defined(__aarch64__)
+  register long v __asm__("x19") = 123;
+  __asm__ volatile("" : "+r"(v));
+#elif defined(__x86_64__)
+  register long v __asm__("r13") = 123;
+  __asm__ volatile("" : "+r"(v));
+#elif defined(__riscv) && __riscv_xlen == 64
+  register long v __asm__("s1") = 123;
+  __asm__ volatile("" : "+r"(v));
+#endif
+}
+
+int test_main(void) {
+  long saved = read_saved_reg();
+  long after;
+  write_saved_reg(77);
+  touch_callee_saved();
+  after = read_saved_reg();
+  write_saved_reg(saved);
+  return (int)after;
+}
diff --git a/test/parse/cases/asm_04_register_callee_saved.expected b/test/parse/cases/asm_04_register_callee_saved.expected
@@ -0,0 +1 @@
+77
diff --git a/test/parse/cases/asm_04_register_callee_saved.wasm.skip b/test/parse/cases/asm_04_register_callee_saved.wasm.skip
@@ -0,0 +1 @@
+native hard-register operands are not meaningful for the wasm backend
diff --git a/test/parse/cases/asm_05_register_label_scope.c b/test/parse/cases/asm_05_register_label_scope.c
@@ -0,0 +1,10 @@
+/* An asm label belongs to the declarator that parsed it. A label on a separate
+ * non-register local must not leak into a later register variable and become a
+ * hard-register pin. */
+
+int test_main(void) {
+  long other __asm__("notareg") = 0;
+  register long v = 42;
+  __asm__ volatile("" : : "r"(v));
+  return (int)(v + other);
+}
diff --git a/test/parse/cases/asm_05_register_label_scope.expected b/test/parse/cases/asm_05_register_label_scope.expected
@@ -0,0 +1 @@
+42
diff --git a/test/parse/cases_err/asm_register_bad_constraint.c b/test/parse/cases_err/asm_register_bad_constraint.c
@@ -0,0 +1,13 @@
+int test_main(void) {
+#if defined(__aarch64__)
+  register long v __asm__("x19") = 1;
+#elif defined(__x86_64__)
+  register long v __asm__("r13") = 1;
+#elif defined(__riscv) && __riscv_xlen == 64
+  register long v __asm__("s1") = 1;
+#else
+  register long v __asm__("r13") = 1;
+#endif
+  __asm__ volatile("" : : "m"(v));
+  return 0;
+}
diff --git a/test/parse/cases_err/asm_register_bad_constraint.errpat b/test/parse/cases_err/asm_register_bad_constraint.errpat
@@ -0,0 +1 @@
+asm hard-register input requires a register constraint
diff --git a/test/parse/cases_err/asm_register_bad_name.c b/test/parse/cases_err/asm_register_bad_name.c
@@ -0,0 +1,5 @@
+int test_main(void) {
+  register long v __asm__("notareg") = 1;
+  __asm__ volatile("" : : "r"(v));
+  return 0;
+}
diff --git a/test/parse/cases_err/asm_register_bad_name.errpat b/test/parse/cases_err/asm_register_bad_name.errpat
@@ -0,0 +1 @@
+unknown asm register variable name
diff --git a/test/parse/cases_err/asm_register_class_mismatch.c b/test/parse/cases_err/asm_register_class_mismatch.c
@@ -0,0 +1,13 @@
+int test_main(void) {
+#if defined(__aarch64__)
+  register long v __asm__("v0") = 1;
+#elif defined(__x86_64__)
+  register long v __asm__("xmm0") = 1;
+#elif defined(__riscv) && __riscv_xlen == 64
+  register long v __asm__("fa0") = 1;
+#else
+  long v = 1;
+#endif
+  __asm__ volatile("" : : "r"(v));
+  return 0;
+}
diff --git a/test/parse/cases_err/asm_register_class_mismatch.errpat b/test/parse/cases_err/asm_register_class_mismatch.errpat
@@ -0,0 +1 @@
+asm register variable class does not match its constraint
diff --git a/test/parse/cases_err/asm_register_forbidden.c b/test/parse/cases_err/asm_register_forbidden.c
@@ -0,0 +1,13 @@
+int test_main(void) {
+#if defined(__aarch64__)
+  register long v __asm__("x9") = 1;
+#elif defined(__x86_64__)
+  register long v __asm__("r10") = 1;
+#elif defined(__riscv) && __riscv_xlen == 64
+  register long v __asm__("zero") = 1;
+#else
+  long v = 1;
+#endif
+  __asm__ volatile("" : : "r"(v));
+  return 0;
+}
diff --git a/test/parse/cases_err/asm_register_forbidden.errpat b/test/parse/cases_err/asm_register_forbidden.errpat
@@ -0,0 +1 @@
+asm register variable names an unsupported register

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	include/kit/cg.h	\|	6	++++++
M	lang/c/parse/cg_adapter.c	\|	2	++
M	lang/c/parse/cg_adapter.h	\|	1	+
M	lang/c/parse/parse.c	\|	14	+++++++++++---
M	lang/c/parse/parse_priv.h	\|	6	++++++
M	lang/c/parse/parse_stmt.c	\|	88	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
M	lang/c/parse/parse_type.c	\|	31	+++++++++++++++++++++++++------
M	src/arch/aa64/native.c	\|	198	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M	src/arch/c_target/c_emit.c	\|	115	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
M	src/arch/native_target.h	\|	13	++++++++++++-
M	src/arch/rv64/native.c	\|	176	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M	src/arch/wasm/emit.c	\|	8	++++++++
M	src/arch/x64/emit.c	\|	15	+--------------
M	src/arch/x64/emit.h	\|	3	---
M	src/arch/x64/native.c	\|	251	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M	src/cg/asm.c	\|	18	+++++++++++++++++-
M	src/cg/cgtarget.h	\|	4	++++
M	src/cg/native_asm.c	\|	74	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/cg/native_asm.h	\|	25	+++++++++++++++++++++++++
M	src/opt/pass_lower.c	\|	7	++++++-
M	src/opt/pass_machinize.c	\|	54	++++++++++++++++++++++--------------------------------
M	test/arch/x64_inline_test.c	\|	27	+++++++++++++++++++++++++++
A	test/parse/cases/asm_03_register_operand.c	\|	37	+++++++++++++++++++++++++++++++++++++
A	test/parse/cases/asm_03_register_operand.expected	\|	1	+
A	test/parse/cases/asm_03_register_operand.wasm.skip	\|	1	+
A	test/parse/cases/asm_04_register_callee_saved.c	\|	71	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	test/parse/cases/asm_04_register_callee_saved.expected	\|	1	+
A	test/parse/cases/asm_04_register_callee_saved.wasm.skip	\|	1	+
A	test/parse/cases/asm_05_register_label_scope.c	\|	10	++++++++++
A	test/parse/cases/asm_05_register_label_scope.expected	\|	1	+
A	test/parse/cases_err/asm_register_bad_constraint.c	\|	13	+++++++++++++
A	test/parse/cases_err/asm_register_bad_constraint.errpat	\|	1	+
A	test/parse/cases_err/asm_register_bad_name.c	\|	5	+++++
A	test/parse/cases_err/asm_register_bad_name.errpat	\|	1	+
A	test/parse/cases_err/asm_register_class_mismatch.c	\|	13	+++++++++++++
A	test/parse/cases_err/asm_register_class_mismatch.errpat	\|	1	+
A	test/parse/cases_err/asm_register_forbidden.c	\|	13	+++++++++++++
A	test/parse/cases_err/asm_register_forbidden.errpat	\|	1	+