kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

dbg.c (12921B)


      1 /* AArch64 lifter for the displaced-step shim.
      2  *
      3  * Lays out a fixed-up copy of one insn in the session scratch slot
      4  * (DBG_DISPLACED_SLOT_BYTES bytes), followed by a BRK sentinel the
      5  * session arms an internal bp on.
      6  *
      7  * Supported families:
      8  *   - any insn with no PC-relative operand (copied verbatim);
      9  *   - B / BL / B.cond              — re-encode the immediate;
     10  *   - CBZ / CBNZ / TBZ / TBNZ      — always emit a trampoline:
     11  *       slot[0]  cond-branch +2 words  (taken → slot+8)
     12  *       slot[4]  BRK                   (not-taken fallthrough)
     13  *       slot[8]  LDR x16, =target
     14  *       slot[12] BR  x16
     15  *       slot[16] literal pool (8 bytes, absolute target)
     16  *   - ADR / ADRP                   — replace with LDR Xd, =target:
     17  *       slot[0]  LDR Xd, =target
     18  *       slot[4]  BRK
     19  *       slot[8]  literal pool (8 bytes)
     20  *   - LDR (literal), integer/LDRSW — synthesize indirect load:
     21  *       slot[0]  LDR x16, =literal_addr
     22  *       slot[4]  LDR Xt/Wt/LDRSW Xt, [x16]
     23  *       slot[8]  BRK
     24  *       slot[12] literal pool (8 bytes, absolute literal addr)
     25  *   - BR / BLR / RET               — copied verbatim; the BRK after never
     26  *     fires because the indirect branch transfers control. The session's
     27  *     stale internal_bp is cleared by the next prepare; finalize gates on
     28  *     PC == return_pc so it stays a no-op when control left the slot. */
     29 
     30 #include <string.h>
     31 
     32 #include "arch/aa64/isa.h"
     33 #include "arch/arch.h"
     34 
     35 #define SHIM_X16 16u /* IP0; safe to clobber inside a shim */
     36 #define AA64_DBG_INSN_LEN 4u
     37 #define AA64_DBG_BL_MASK 0xFC000000u
     38 #define AA64_DBG_BL_OP 0x94000000u
     39 
     40 static uint32_t aa64_dbg_brk_word(void) { return aa64_brk(0); }
     41 
     42 static int fits_signed(int64_t v, int bits) {
     43   int64_t lim = (int64_t)1 << (bits - 1);
     44   return v >= -lim && v < lim;
     45 }
     46 
     47 /* LDR (literal) for integer Xt: opc=01, V=0, fixed bits 011_0_00.
     48  *   01 011 0 00 imm19 Rt   →  0x58000000 | (imm19<<5) | Rt
     49  * imm19 is the signed word offset from the LDR's own PC. */
     50 static uint32_t enc_ldr_lit_x(uint32_t Rt, int32_t imm19) {
     51   return 0x58000000u | (((uint32_t)imm19 & 0x7ffffu) << 5) | (Rt & 0x1fu);
     52 }
     53 /* LDR Xt, [Xn, #0] / LDR Wt, [Xn, #0] / LDRSW Xt, [Xn, #0]. */
     54 static uint32_t enc_ldr64_reg(uint32_t Rt, uint32_t Rn) {
     55   return aa64_ldr64_uimm12(Rt, Rn, 0);
     56 }
     57 static uint32_t enc_ldr32_reg(uint32_t Rt, uint32_t Rn) {
     58   return aa64_ldst_uimm_pack((AA64LdStUimm){.size = 2,
     59                                             .V = 0,
     60                                             .opc = AA64_LDST_OPC_LDR,
     61                                             .imm12 = 0,
     62                                             .Rn = Rn,
     63                                             .Rt = Rt});
     64 }
     65 static uint32_t enc_ldrsw_reg(uint32_t Rt, uint32_t Rn) {
     66   return aa64_ldst_uimm_pack((AA64LdStUimm){
     67       .size = 2, .V = 0, .opc = 2, .imm12 = 0, .Rn = Rn, .Rt = Rt});
     68 }
     69 
     70 static void put_u32(uint8_t* w, uint32_t off, uint32_t v) {
     71   memcpy(w + off, &v, sizeof(v));
     72 }
     73 static void put_u64(uint8_t* w, uint32_t off, uint64_t v) {
     74   memcpy(w + off, &v, sizeof(v));
     75 }
     76 
     77 /* Sign-extend a `bits`-wide field whose raw value is `v`. */
     78 static int64_t sign_extend(uint64_t v, int bits) {
     79   uint64_t m = 1ull << (bits - 1);
     80   return (int64_t)((v ^ m) - m);
     81 }
     82 
     83 static int aa64_dbg_build_shim_word(uint32_t orig_insn, uint64_t orig_pc,
     84                                     void* scratch_write,
     85                                     uint64_t scratch_runtime, u32* shim_len) {
     86   uint8_t* w = (uint8_t*)scratch_write;
     87   uint32_t brk = aa64_brk(0);
     88   int64_t pc_delta;
     89   if (!shim_len) return 1;
     90   *shim_len = 0;
     91   pc_delta = (int64_t)orig_pc - (int64_t)scratch_runtime;
     92 
     93   /* ---- B / BL (imm26) ------------------------------------------------ */
     94   if ((orig_insn & 0x7C000000u) == 0x14000000u) {
     95     AA64BrImm f = aa64_brimm_unpack(orig_insn);
     96     int64_t imm = sign_extend(f.imm26, 26);
     97     int64_t new_off = imm * 4 + pc_delta;
     98     if ((new_off & 3) || !fits_signed(new_off / 4, 26)) {
     99       /* Out of B/BL range from scratch: fall back to LDR x30/PC trick is
    100        * messy for BL (need to preserve LR). Decline. */
    101       return 1;
    102     }
    103     f.imm26 = (uint32_t)((new_off / 4) & 0x3ffffffu);
    104     put_u32(w, 0, aa64_brimm_pack(f));
    105     put_u32(w, 4, brk);
    106     *shim_len = 4;
    107     return 0;
    108   }
    109 
    110   /* ---- B.cond (imm19) ------------------------------------------------ */
    111   if ((orig_insn & 0xFF000010u) == 0x54000000u) {
    112     AA64BrCond f = aa64_brcond_unpack(orig_insn);
    113     int64_t imm = sign_extend(f.imm19, 19);
    114     int64_t new_off = imm * 4 + pc_delta;
    115     if ((new_off & 3) || !fits_signed(new_off / 4, 19)) {
    116       /* Synthesize: B.cond +8 (skip BRK) ; BRK ; LDR x16,=tgt ; BR x16 ;
    117        * literal. The "taken" path branches to slot+8, the "not-taken"
    118        * path falls through to BRK at slot+4. */
    119       uint64_t target = orig_pc + (uint64_t)(imm * 4);
    120       AA64BrCond nf;
    121       nf.cond = f.cond;
    122       nf.imm19 = 2u; /* +8 bytes from slot[0] → slot[8] */
    123       put_u32(w, 0, aa64_brcond_pack(nf));
    124       put_u32(w, 4, brk);
    125       put_u32(w, 8,
    126               enc_ldr_lit_x(SHIM_X16, 2)); /* LDR x16, [pc+8] = slot[16] */
    127       put_u32(w, 12, aa64_br(SHIM_X16));
    128       put_u64(w, 16, target);
    129       *shim_len = 4;
    130       return 0;
    131     }
    132     f.imm19 = (uint32_t)((new_off / 4) & 0x7ffffu);
    133     put_u32(w, 0, aa64_brcond_pack(f));
    134     put_u32(w, 4, brk);
    135     *shim_len = 4;
    136     return 0;
    137   }
    138 
    139   /* ---- CBZ / CBNZ (imm19) — always trampoline form ------------------- */
    140   if ((orig_insn & 0x7E000000u) == 0x34000000u) {
    141     AA64CB f = aa64_cb_unpack(orig_insn);
    142     int64_t imm = sign_extend(f.imm19, 19);
    143     uint64_t target = orig_pc + (uint64_t)(imm * 4);
    144     AA64CB nf = f;
    145     nf.imm19 = 2u; /* +8 → slot[8] */
    146     put_u32(w, 0, aa64_cb_pack(nf));
    147     put_u32(w, 4, brk);
    148     put_u32(w, 8, enc_ldr_lit_x(SHIM_X16, 2));
    149     put_u32(w, 12, aa64_br(SHIM_X16));
    150     put_u64(w, 16, target);
    151     *shim_len = 4;
    152     return 0;
    153   }
    154 
    155   /* ---- TBZ / TBNZ (imm14) — always trampoline ------------------------
    156    *   b5 011011 op b40[18:14] imm14[18:5]  -- wait, field layout:
    157    *   b5(31) 011011(30..25) op(24) b40(23..19) imm14(18..5) Rt(4..0). */
    158   if ((orig_insn & 0x7E000000u) == 0x36000000u) {
    159     uint32_t b5 = (orig_insn >> 31) & 1u;
    160     uint32_t op = (orig_insn >> 24) & 1u;
    161     uint32_t b40 = (orig_insn >> 19) & 0x1fu;
    162     uint32_t Rt = orig_insn & 0x1fu;
    163     uint32_t imm14_raw = (orig_insn >> 5) & 0x3fffu;
    164     int64_t imm = sign_extend(imm14_raw, 14);
    165     uint64_t target = orig_pc + (uint64_t)(imm * 4);
    166     uint32_t new_imm14 = 2u; /* +8 → slot[8] */
    167     uint32_t new_word = (b5 << 31) | 0x36000000u | (op << 24) | (b40 << 19) |
    168                         ((new_imm14 & 0x3fffu) << 5) | (Rt & 0x1fu);
    169     put_u32(w, 0, new_word);
    170     put_u32(w, 4, brk);
    171     put_u32(w, 8, enc_ldr_lit_x(SHIM_X16, 2));
    172     put_u32(w, 12, aa64_br(SHIM_X16));
    173     put_u64(w, 16, target);
    174     *shim_len = 4;
    175     return 0;
    176   }
    177 
    178   /* ---- ADR / ADRP ---------------------------------------------------- */
    179   if ((orig_insn & 0x1F000000u) == 0x10000000u) {
    180     AA64PCRelAdr f = aa64_pcrel_adr_unpack(orig_insn);
    181     uint64_t imm_raw = ((uint64_t)f.immhi << 2) | (uint64_t)f.immlo;
    182     int64_t imm21 = sign_extend(imm_raw, 21);
    183     uint64_t target;
    184     if (f.op == AA64_ADR_OP_ADRP) {
    185       target = (orig_pc & ~(uint64_t)0xFFF) + ((uint64_t)imm21 << 12);
    186     } else {
    187       target = orig_pc + (uint64_t)imm21;
    188     }
    189     /* LDR Xd, [pc + 8] — the literal sits at slot[8]. */
    190     put_u32(w, 0, enc_ldr_lit_x(f.Rd, 2));
    191     put_u32(w, 4, brk);
    192     put_u64(w, 8, target);
    193     *shim_len = 4;
    194     return 0;
    195   }
    196 
    197   /* ---- LDR (literal) — integer & LDRSW only -------------------------- */
    198   if ((orig_insn & 0x3B000000u) == 0x18000000u) {
    199     uint32_t opc = (orig_insn >> 30) & 3u;
    200     uint32_t V = (orig_insn >> 26) & 1u;
    201     uint32_t Rt = orig_insn & 0x1fu;
    202     uint32_t imm19_raw = (orig_insn >> 5) & 0x7ffffu;
    203     int64_t imm19 = sign_extend(imm19_raw, 19);
    204     uint64_t literal_addr = orig_pc + (uint64_t)(imm19 * 4);
    205     uint32_t load_insn;
    206     if (V) return 1; /* vector forms (S/D/Q): not supported in v1 */
    207     switch (opc) {
    208       case 0:
    209         load_insn = enc_ldr32_reg(Rt, SHIM_X16);
    210         break; /* LDR Wt */
    211       case 1:
    212         load_insn = enc_ldr64_reg(Rt, SHIM_X16);
    213         break; /* LDR Xt */
    214       case 2:
    215         load_insn = enc_ldrsw_reg(Rt, SHIM_X16);
    216         break; /* LDRSW */
    217       default:
    218         return 1; /* PRFM (literal): not meaningful here */
    219     }
    220     /* LDR x16, [pc + 12] — literal at slot[12]. */
    221     put_u32(w, 0, enc_ldr_lit_x(SHIM_X16, 3));
    222     put_u32(w, 4, load_insn);
    223     put_u32(w, 8, brk);
    224     put_u64(w, 12, literal_addr);
    225     *shim_len = 8;
    226     return 0;
    227   }
    228 
    229   /* ---- BR / BLR / RET (indirect) ------------------------------------- */
    230   if ((orig_insn & 0xFE1FFC1Fu) == AA64_BR_REG_FAMILY_MATCH) {
    231     /* Copy verbatim; the BRK after will not fire because control
    232      * transfers to the register target. The session clears the stale
    233      * internal bp on the next prepare. */
    234     put_u32(w, 0, orig_insn);
    235     put_u32(w, 4, brk);
    236     *shim_len = 4;
    237     return 0;
    238   }
    239 
    240   /* ---- default: no PC-relative operand — copy verbatim --------------- */
    241   put_u32(w, 0, orig_insn);
    242   put_u32(w, 4, brk);
    243   *shim_len = 4;
    244   return 0;
    245 }
    246 
    247 static KitStatus aa64_dbg_breakpoint_patch(u8* out, u32 cap, u32* len_out) {
    248   uint32_t brk = aa64_dbg_brk_word();
    249   if (!out || !len_out) return KIT_INVALID;
    250   if (cap < AA64_DBG_INSN_LEN) return KIT_INVALID;
    251   memcpy(out, &brk, sizeof(brk));
    252   *len_out = AA64_DBG_INSN_LEN;
    253   return KIT_OK;
    254 }
    255 
    256 static u64 aa64_dbg_breakpoint_addr_from_fault_pc(u64 fault_pc) {
    257   return fault_pc;
    258 }
    259 
    260 static KitStatus aa64_dbg_decode_insn(const u8* bytes, u32 len, u64 pc,
    261                                       ArchDbgInsn* out) {
    262   if (!bytes || !out) return KIT_INVALID;
    263   if (len < AA64_DBG_INSN_LEN) return KIT_UNSUPPORTED;
    264   memset(out, 0, sizeof(*out));
    265   out->pc = pc;
    266   out->len = AA64_DBG_INSN_LEN;
    267   memcpy(out->bytes, bytes, AA64_DBG_INSN_LEN);
    268   return KIT_OK;
    269 }
    270 
    271 static KitStatus aa64_dbg_build_displaced_shim(
    272     const ArchDbgInsn* insn, void* scratch_write, u64 scratch_runtime,
    273     u32 scratch_cap, u32* sentinel_off, u64* fallthrough_pc) {
    274   uint32_t word = 0;
    275   if (!insn || !scratch_write || !sentinel_off || !fallthrough_pc)
    276     return KIT_INVALID;
    277   if (insn->len != AA64_DBG_INSN_LEN) return KIT_UNSUPPORTED;
    278   if (scratch_cap < 24u) return KIT_INVALID;
    279   memcpy(&word, insn->bytes, sizeof(word));
    280   if (aa64_dbg_build_shim_word(word, insn->pc, scratch_write, scratch_runtime,
    281                                sentinel_off) != 0) {
    282     return KIT_UNSUPPORTED;
    283   }
    284   *fallthrough_pc = insn->pc + AA64_DBG_INSN_LEN;
    285   return KIT_OK;
    286 }
    287 
    288 static int aa64_dbg_is_call(const ArchDbgInsn* insn) {
    289   uint32_t word = 0;
    290   if (!insn || insn->len != AA64_DBG_INSN_LEN) return 0;
    291   memcpy(&word, insn->bytes, sizeof(word));
    292   return (word & AA64_DBG_BL_MASK) == AA64_DBG_BL_OP;
    293 }
    294 
    295 static KitStatus aa64_dbg_direct_call_target(const ArchDbgInsn* insn,
    296                                              u64* target_out) {
    297   uint32_t word = 0;
    298   AA64BrImm f;
    299   int64_t imm;
    300   if (!insn || !target_out) return KIT_INVALID;
    301   if (insn->len != AA64_DBG_INSN_LEN) return KIT_UNSUPPORTED;
    302   memcpy(&word, insn->bytes, sizeof(word));
    303   if ((word & AA64_DBG_BL_MASK) != AA64_DBG_BL_OP) return KIT_NOT_FOUND;
    304   f = aa64_brimm_unpack(word);
    305   imm = sign_extend(f.imm26, 26);
    306   *target_out = insn->pc + (u64)(imm * 4);
    307   return KIT_OK;
    308 }
    309 
    310 static KitStatus aa64_dbg_direct_jump_target(const ArchDbgInsn* insn,
    311                                              u64* target_out) {
    312   uint32_t word = 0;
    313   AA64BrImm f;
    314   int64_t imm;
    315   if (!insn || !target_out) return KIT_INVALID;
    316   if (insn->len != AA64_DBG_INSN_LEN) return KIT_UNSUPPORTED;
    317   memcpy(&word, insn->bytes, sizeof(word));
    318   if ((word & AA64_BR_IMM_FAMILY_MASK) != AA64_BR_IMM_FAMILY_MATCH)
    319     return KIT_NOT_FOUND;
    320   f = aa64_brimm_unpack(word);
    321   if (f.op != 0) return KIT_NOT_FOUND;
    322   imm = sign_extend(f.imm26, 26);
    323   *target_out = insn->pc + (u64)(imm * 4);
    324   return KIT_OK;
    325 }
    326 
    327 static KitStatus aa64_dbg_link_register_return_address(
    328     const KitUnwindFrame* frame, u64* target_out) {
    329   if (!frame || !target_out) return KIT_INVALID;
    330   if (frame->regs[AA64_LR] == 0) return KIT_NOT_FOUND;
    331   *target_out = frame->regs[AA64_LR];
    332   return KIT_OK;
    333 }
    334 
    335 const ArchDbgOps aa64_dbg_ops = {
    336     .min_insn_len = AA64_DBG_INSN_LEN,
    337     .max_insn_len = AA64_DBG_INSN_LEN,
    338     .breakpoint_patch = aa64_dbg_breakpoint_patch,
    339     .breakpoint_addr_from_fault_pc = aa64_dbg_breakpoint_addr_from_fault_pc,
    340     .decode_insn = aa64_dbg_decode_insn,
    341     .build_displaced_shim = aa64_dbg_build_displaced_shim,
    342     .is_call = aa64_dbg_is_call,
    343     .direct_call_target = aa64_dbg_direct_call_target,
    344     .direct_jump_target = aa64_dbg_direct_jump_target,
    345     .link_register_return_address = aa64_dbg_link_register_return_address,
    346 };