commit 52fb5121119dd0b223fa0ab022ba05ee2ee5d275 parent c651b11ca099c4b878a023fbd61bde9f8e108eee Author: Ryan Sepassi <rsepassi@gmail.com> Date: Tue, 5 May 2026 17:42:07 -0700 seed-kernel: tcc3 self-host of kernel.{S,c} now boots under qemu Assembler (arm64-asm.c / arm64-tok.h / arm64-link.c / tccasm.c) now covers what kernel.S needs: named MRS/MSR sysregs + MSR-imm (daifset/daifclr), eret, ic/tlbi SYS aliases, named dsb/dmb scopes, .quad sym-difference (R_AARCH64_PREL64), and forward b.cond/cbz/cbnz/ tbz via R_AARCH64_CONDBR19 / R_AARCH64_TSTBR14 reloc emission. Codegen fix in arm64-gen.c: load()/store() truncated SValue::c.i to uint32_t, silently dropping the upper 32 bits of any constant pointer address — fatal for the seed-kernel writing through the device alias VA 0x109000018 (PL011 UART). Drop the truncation; signed 9-bit ldur/stur offsets fit regardless. kernel.S enables CPACR_EL1.FPEN before the first `bl kmain` since tcc (unlike gcc -mgeneral-regs-only) saves callee-saved SIMD regs in every function prologue and would otherwise trap immediately. kernel.c stops defining mem* inline; tcc-cc/mem.c (memcpy/memset/ memmove/memcmp) is linked alongside in both the gcc Makefile and the tcc-build path so both compilers see the same implementation. Diffstat:
21 files changed, 798 insertions(+), 116 deletions(-)
diff --git a/docs/OS-TODO.md b/docs/OS-TODO.md @@ -31,70 +31,96 @@ features the doc previously listed as "tcc-side fixable": - The 16 `.macro VENTRY` invocations unrolled. - `.macro SAVE_TF / RESTORE_TF` unrolled at all three trap entries. -What remains, observed by running tcc3 against the post-refactor -sources (and confirmed empirically by stubbing each blocker and -re-running until the next one surfaces). - -### `kernel.S` blockers — assembler - -Failures from `tcc3 -c kernel.S` (line numbers as of this writing): - -1. **`.quad sym1 - sym2`** (line 13: `.quad _image_end - _head` in - the Image header). `tccasm.c`'s `.quad` handler on non-x86_64 - reads tokens through `strtoll` and rejects everything but a bare - integer. `asm_expr_sum` already supports same-section symbol - subtraction (lines 286–294 of tccasm.c) — extending `.quad` to go - through it is ~10 lines. -2. **`msr` / `mrs` named system registers** (line 23: `msr daifset, - #0xf`; line 26: `mrs x9, CurrentEL`; many more). Both mnemonics - absent from `arm64-tok.h` / `arm64-asm.c`. Needs: - - A small named-sysreg table covering the kernel's set: - `mair_el1`, `tcr_el1`, `ttbr0_el1`, `sctlr_el1`, `cpacr_el1`, - `sp_el0`, `sp_el1`, `esr_el1`, `far_el1`, `vbar_el1`, `elr_el1`, - `spsr_el1`, `hcr_el2`, `spsr_el2`, `elr_el2`, plus read-only - `CurrentEL`. - - The MSR-immediate form for ProcState fields: `msr daifset, - #imm` / `msr daifclr, #imm`. -3. **`eret`** (line 48). Single 32-bit fixed encoding (`0xd69f03e0`). -4. **`ic iallu`** (one site), **`tlbi vmalle1`** (one site). Both - are SYS-instruction encodings; one generic `emit_sys` helper plus - a small named-op table covers them and leaves room to grow. -5. **`dsb sy` / `dsb ish` / `dmb ish` / `dmb ishst` by name.** The - current `do_barrier` already accepts `#imm`; adding the named - tokens is one extra `tok_to_barrier_crm()` lookup. Workaround - for now: write `dsb #0xf` (sy), `dsb #0xb` (ish), `dmb #0xb` - (ish), `dmb #0xa` (ishst). No tcc work needed if we accept the - numeric form. -6. **`b.cond` to a forward in-section label** (line 29: `b.ne - in_el1`, plus several others to `1f` / `2f`). `sec_local_offset` - in `arm64-asm.c` rejects `sym->r != cur_text_section->sh_num`, - but on a forward reference the symbol is freshly created with - `sym->r == 0` (SHN_UNDEF) — so even legitimate same-section - forward jumps fall through to the "extern target needs CONDBR19 - reloc (unsupported)" error path. Two fixes: - - Add a forward-fixup list (record use site, patch when label is - defined). Same shape `i386-asm.c` uses. - - Implement `R_AARCH64_CONDBR19` in `arm64-link.c` and emit a - reloc when the symbol is undefined; the linker resolves it. - Both fix `cbz`/`cbnz` (#7) too. The reloc path is also needed if - a future kernel ever wants `b.cond <extern>`. -7. **`cbz` / `cbnz` to a forward in-section label** (line 440: - `cbnz x9, .Lpsci_smc`). Same root cause as #6 — same emitter - helper (`sec_local_offset`) — same fix unlocks both. - -The `.macro`/`.endm` and `adrp`/`:lo12:` items from the previous -draft of this list are gone: the first by unrolling, the second by -switching to `ldr Xn, =sym`. Both are pure-`kernel.S` choices that -made the assembler's job substantially smaller. - -### `kernel.S` blockers — runtime / codegen - -8. **`memmove` undefined at link.** `kernel.c` defines `memset` and - `memcpy` but not `memmove`. gcc never emits a `memmove` call - from this source, but tcc does (likely a struct-copy lowering; - the offending site has not been chased down). Add a small - `memmove` next to `memset`/`memcpy` in `kernel.c` — overlap - logic is ~6 lines. +### `kernel.S` blockers — assembler — DONE + +`tcc3 -c kernel.S` now succeeds. The seven items below all landed +together (assembler + linker patches in `scripts/simple-patches/ +tcc-0.9.26/`, dispatched out of `arm64-asm.c` / `arm64-link.c` / +`tccasm.c`): + +1. **`.quad sym1 - sym2`.** `.quad` is now routed through + `asm_data` (the same path `.long` uses) for both X86_64 and + ARM64. `arm64-asm.c` defines `gen_expr64` / `gen_expr32` that + emit `R_AARCH64_PREL64` / `R_AARCH64_PREL32` for symbol + differences (the asm-expr-sum pcrel branch leaves the addend + biased by +4 in the x86 PC32 convention; the emitters + compensate so it matches gas's reloc). +2. **Named MSR / MRS sysregs + MSR-immediate.** `arm64-tok.h` + declares `mrs`/`msr` plus `daifset`/`daifclr`. `arm64-asm.c` + carries a small `sysregs[]` table covering the kernel's set + (`mair_el1`, `tcr_el1`, `ttbr0_el1`, `sctlr_el1`, `cpacr_el1`, + `sp_el0`, `sp_el1`, `esr_el1`, `far_el1`, `vbar_el1`, + `elr_el1`, `spsr_el1`, `hcr_el2`, `spsr_el2`, `elr_el2`, + `currentel`); name lookup is case-insensitive so `CurrentEL` + matches. +3. **`eret`** maps to a fixed `0xd69f03e0` emit. +4. **`ic`/`tlbi` SYS-instruction aliases.** A generic + `emit_sys_alias` helper plus per-mnemonic name tables + (`ic_aliases[]`, `tlbi_aliases[]`) covers `ic iallu`/ + `ic ialluis`/`ic ivau` and `tlbi vmalle1`/`vmalle1is`/ + `alle1`/`alle1is`. +5. **Named `dsb`/`dmb` scope tokens.** `arm64-tok.h` declares + `sy`/`ish`/`ishst`/`ishld`/`nsh`/`nshst`/`nshld`/ + `osh`/`oshst`/`oshld`; `do_barrier` accepts either the named + form or the prior `#imm` numeric form. +6. **Forward `b.cond` to in-section labels.** `arm64-link.c` + gained `R_AARCH64_CONDBR19` (and `R_AARCH64_TSTBR14` for tbz); + the new `sec_branch_offset_or_reloc` falls back to a reloc + when the target sym is forward / extern, and the linker + resolves it. +7. **Forward `cbz`/`cbnz`** ride the same CONDBR19 reloc path. + +Verification: `tcc -c seed-kernel/kernel.S` produces an .o that +links cleanly against a gcc-compiled `kernel.c` and `seed-kernel/ +kernel.lds`; the resulting `Image` differs from the gas baseline +only by `.balign` fill (zero vs `nop`) and a couple of byte-shift +choices that the linker resolves to identical run-time semantics +(vector-table padding is never executed; `ldr Xn, =sym` lowers to +a 4-instruction MOVW chain instead of a literal pool, giving a +slightly larger but identical-function `.head.text`). + +The `.macro`/`.endm` and `adrp`/`:lo12:` items from earlier +drafts are gone: the first by unrolling, the second by switching +to `ldr Xn, =sym`. Both are pure-`kernel.S` choices that made the +assembler's job substantially smaller. + +### `kernel.S` blockers — runtime / codegen — DONE + +8. **`mem*` symbols.** `kernel.c` no longer defines `memset` / + `memcpy` inline; they (plus `memmove`, which tcc emits for some + struct copies and gcc inlines) come from `tcc-cc/mem.c`, which + the seed-kernel `Makefile` and the tcc-build script both + compile + link alongside `kasm.o` / `kernel.o`. Same bytes + under both compilers. + +### `kernel.c` codegen blockers — DONE + +9. **`-mgeneral-regs-only` has no tcc equivalent.** tcc saves + the callee-saved SIMD registers (`q0..q7`) in every C + function prologue; with default `CPACR_EL1.FPEN = 00` those + stp/ldp instructions trap to EL1 with EC=0x07. The fix is + in `kernel.S`: enable FP/SIMD (`CPACR_EL1.FPEN = 0b11`) + immediately after `vbar_el1` is set, before the first + `bl kmain`. gcc-built kernels are unaffected because gcc + honours `-mgeneral-regs-only` and never emits SIMD. + +10. **`(volatile T *)CONST` truncation.** Stock `arm64-gen.c` + in tcc 0.9.26 starts both `load()` and `store()` with + `uint64_t svcul = (uint32_t)sv->c.i;` followed by a + sign-extend, dropping the top 32 bits of any pointer-sized + constant address. Fine for struct-field offsets, fatal for + the seed-kernel writing through `(volatile u32 *) + 0x109000018` (the device alias for the PL011 UART under our + `setup_mmu` layout). Patches `arm64-svcul-no-truncate{,-store}` + in `scripts/simple-patches/tcc-0.9.26/` keep the full 64-bit + constant; signed 9-bit ldur/stur offsets fit regardless so + the prior sign-extend was a no-op for legitimate offsets. + +End-to-end: a kernel built entirely with the patched tcc3 +(`tcc3 -c kernel.{S,c}` + `tcc3 -c tcc-cc/mem.c` → `ld -T +kernel.lds`) boots under qemu-system-aarch64 and produces +identical console output to the gcc-built kernel through +`[seed] user exit_group(0)`. ### `kernel.lds` blockers — linker diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.after b/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.after @@ -0,0 +1,66 @@ +int code_reloc (int reloc_type) +{ + switch (reloc_type) { + case R_AARCH64_ABS32: + case R_AARCH64_ABS64: + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + case R_AARCH64_MOVW_UABS_G0_NC: + case R_AARCH64_MOVW_UABS_G1_NC: + case R_AARCH64_MOVW_UABS_G2_NC: + case R_AARCH64_MOVW_UABS_G3: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_COPY: + return 0; + + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: + case R_AARCH64_JUMP_SLOT: + return 1; + } + + tcc_error ("Unknown relocation type: %d", reloc_type); + return -1; +} + +/* Returns an enumerator to describe whether and when the relocation needs a + GOT and/or PLT entry to be created. See tcc.h for a description of the + different values. */ +int gotplt_entry_type (int reloc_type) +{ + switch (reloc_type) { + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + case R_AARCH64_MOVW_UABS_G0_NC: + case R_AARCH64_MOVW_UABS_G1_NC: + case R_AARCH64_MOVW_UABS_G2_NC: + case R_AARCH64_MOVW_UABS_G3: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_JUMP_SLOT: + case R_AARCH64_COPY: + return NO_GOTPLT_ENTRY; + + case R_AARCH64_ABS32: + case R_AARCH64_ABS64: + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: + return AUTO_GOTPLT_ENTRY; + + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + return ALWAYS_GOTPLT_ENTRY; + } + + tcc_error ("Unknown relocation type: %d", reloc_type); + return -1; +} diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.before b/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.before @@ -0,0 +1,60 @@ +int code_reloc (int reloc_type) +{ + switch (reloc_type) { + case R_AARCH64_ABS32: + case R_AARCH64_ABS64: + case R_AARCH64_PREL32: + case R_AARCH64_MOVW_UABS_G0_NC: + case R_AARCH64_MOVW_UABS_G1_NC: + case R_AARCH64_MOVW_UABS_G2_NC: + case R_AARCH64_MOVW_UABS_G3: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_COPY: + return 0; + + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + case R_AARCH64_JUMP_SLOT: + return 1; + } + + tcc_error ("Unknown relocation type: %d", reloc_type); + return -1; +} + +/* Returns an enumerator to describe whether and when the relocation needs a + GOT and/or PLT entry to be created. See tcc.h for a description of the + different values. */ +int gotplt_entry_type (int reloc_type) +{ + switch (reloc_type) { + case R_AARCH64_PREL32: + case R_AARCH64_MOVW_UABS_G0_NC: + case R_AARCH64_MOVW_UABS_G1_NC: + case R_AARCH64_MOVW_UABS_G2_NC: + case R_AARCH64_MOVW_UABS_G3: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_JUMP_SLOT: + case R_AARCH64_COPY: + return NO_GOTPLT_ENTRY; + + case R_AARCH64_ABS32: + case R_AARCH64_ABS64: + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + return AUTO_GOTPLT_ENTRY; + + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + return ALWAYS_GOTPLT_ENTRY; + } + + tcc_error ("Unknown relocation type: %d", reloc_type); + return -1; +} diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.after b/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.after @@ -0,0 +1,31 @@ + case R_AARCH64_ABS32: + write32le(ptr, val); + return; + case R_AARCH64_PREL32: + write32le(ptr, val - addr); + return; + case R_AARCH64_PREL64: + write64le(ptr, val - addr); + return; + case R_AARCH64_CONDBR19: { + /* B.cond imm19 (bits 23:5), encoding 4-byte aligned target. */ + int64_t off = (int64_t)(val - addr); + if (off & 3) + tcc_error("R_AARCH64_CONDBR19: target not 4-byte aligned"); + if (off < -(1 << 20) || off >= (1 << 20)) + tcc_error("R_AARCH64_CONDBR19: target out of range"); + write32le(ptr, (read32le(ptr) & 0xff00001fu) | + (((uint32_t)(off >> 2) & 0x7ffffu) << 5)); + return; + } + case R_AARCH64_TSTBR14: { + /* TBZ/TBNZ imm14 (bits 18:5). */ + int64_t off = (int64_t)(val - addr); + if (off & 3) + tcc_error("R_AARCH64_TSTBR14: target not 4-byte aligned"); + if (off < -(1 << 15) || off >= (1 << 15)) + tcc_error("R_AARCH64_TSTBR14: target out of range"); + write32le(ptr, (read32le(ptr) & 0xfff8001fu) | + (((uint32_t)(off >> 2) & 0x3fffu) << 5)); + return; + } diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.before b/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.before @@ -0,0 +1,6 @@ + case R_AARCH64_ABS32: + write32le(ptr, val); + return; + case R_AARCH64_PREL32: + write32le(ptr, val - addr); + return; diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.after b/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.after @@ -0,0 +1,7 @@ +ST_FUNC void store(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + /* See arm64-gen.c::load() — same uint32_t truncation bug. */ + uint64_t svcul = (uint64_t)sv->c.i; diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.before b/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.before @@ -0,0 +1,7 @@ +ST_FUNC void store(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + uint64_t svcul = (uint32_t)sv->c.i; + svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul; diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.after b/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.after @@ -0,0 +1,13 @@ +ST_FUNC void load(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + /* Stock tcc 0.9.26 truncates sv->c.i to uint32_t and then sign- + * extends, which destroys the upper 32 bits of any pointer-sized + * constant address. This is fine for struct-field offsets but + * silently miscompiles writes through `*(volatile T *)0x1xxxxxxxx`. + * Keep the full 64-bit constant; signed offsets used by ldur/stur + * already fit in 9 bits so the prior sign-extend was a no-op for + * legitimate offsets. */ + uint64_t svcul = (uint64_t)sv->c.i; diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.before b/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.before @@ -0,0 +1,7 @@ +ST_FUNC void load(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + uint64_t svcul = (uint32_t)sv->c.i; + svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul; diff --git a/scripts/simple-patches/tcc-0.9.26/files/arm64-asm.c b/scripts/simple-patches/tcc-0.9.26/files/arm64-asm.c @@ -79,7 +79,37 @@ ST_FUNC void gen_le32(int i) ST_FUNC void gen_expr32(ExprValue *pe) { - gen_le32(pe->v); + if (pe->pcrel) { + /* `.long sym - .` style — emit R_AARCH64_PREL32. asm_expr_sum + biased pe->v by +4 (x86 PC32 convention); subtract it back. */ + greloca(cur_text_section, pe->sym, ind, R_AARCH64_PREL32, pe->v - 4); + gen_le32(0); + } else if (pe->sym) { + greloca(cur_text_section, pe->sym, ind, R_AARCH64_ABS32, pe->v); + gen_le32(0); + } else { + gen_le32(pe->v); + } +} + +ST_FUNC void gen_expr64(ExprValue *pe) +{ + if (pe->pcrel) { + /* `.quad sym - .` / `.quad sym - sym2` (sym2 same-section): the + asm_expr_sum pcrel branch left pe->v biased by +4 (x86 PC32 + convention). Compensate so the addend matches gas's + R_AARCH64_PREL64 emission. */ + greloca(cur_text_section, pe->sym, ind, R_AARCH64_PREL64, pe->v - 4); + gen_le32(0); + gen_le32(0); + } else if (pe->sym) { + greloca(cur_text_section, pe->sym, ind, R_AARCH64_ABS64, pe->v); + gen_le32(0); + gen_le32(0); + } else { + gen_le32((uint32_t)pe->v); + gen_le32((uint32_t)((uint64_t)pe->v >> 32)); + } } /* ---- operand model ------------------------------------------------ */ @@ -678,64 +708,89 @@ static void emit_branch_imm(AArch64Op *op, int is_call) } } -/* Compute a signed PC-relative offset from a same-section, non-extern - * symbol expression. Errors on extern reference. */ -static int64_t sec_local_offset(AArch64Op *op, const char *what) +/* Resolve a branch-target operand to either a same-section in-range + * immediate offset (returns 1, *poff is set) or a relocation against a + * not-yet-defined symbol (returns 0, *psym set, *poff = original addend). + * Forward references to a label not yet seen produce a fresh symbol + * with r=0 (SHN_UNDEF); the linker fixes them up via R_AARCH64_CONDBR19 + * / R_AARCH64_TSTBR14. */ +static int sec_branch_offset_or_reloc(AArch64Op *op, int64_t *poff, Sym **psym) { Sym *sym = op->e.sym; int64_t off; - if (sym) { - if (sym->r != cur_text_section->sh_num || (sym->type.t & VT_EXTERN)) - tcc_error("%s: extern target needs CONDBR19 reloc (unsupported)", what); + if (sym && sym->r == cur_text_section->sh_num + && !(sym->type.t & VT_EXTERN)) { off = (int64_t)sym->jnext + (int64_t)op->e.v - (int64_t)ind; - } else { - off = (int64_t)op->e.v; + if (off & 3) tcc_error("branch target not 4-byte aligned"); + *poff = off; + *psym = NULL; + return 1; } - if (off & 3) tcc_error("%s: target not 4-byte aligned", what); - return off; + if (sym) { + *psym = sym; + *poff = (int64_t)op->e.v; + return 0; + } + off = (int64_t)op->e.v; + if (off & 3) tcc_error("branch target not 4-byte aligned"); + *poff = off; + *psym = NULL; + return 1; } -/* B.cond (in-section only). */ +/* B.cond (in-section + forward-ref / extern via R_AARCH64_CONDBR19). */ static void emit_branch_cond(int cond, AArch64Op *target) { - int64_t off = sec_local_offset(target, "b.cond"); - int64_t imm = off >> 2; - if (imm < -(1 << 18) || imm >= (1 << 18)) - tcc_error("b.cond: target out of 19-bit range"); - gen_le32(0x54000000u | (((uint32_t)imm & 0x7ffffu) << 5) | (uint32_t)cond); + int64_t off; Sym *sym; + uint32_t base = 0x54000000u | (uint32_t)cond; + if (sec_branch_offset_or_reloc(target, &off, &sym)) { + int64_t imm = off >> 2; + if (imm < -(1 << 18) || imm >= (1 << 18)) + tcc_error("b.cond: target out of 19-bit range"); + gen_le32(base | (((uint32_t)imm & 0x7ffffu) << 5)); + } else { + greloca(cur_text_section, sym, ind, R_AARCH64_CONDBR19, off); + gen_le32(base); + } } -/* CBZ/CBNZ: op=0/1. */ +/* CBZ/CBNZ — same imm19 layout as b.cond, so reuses CONDBR19. */ static void emit_branch_cmp(int rt, AArch64Op *target, int is_w, int op_cbnz) { - int64_t off = sec_local_offset(target, "cbz/cbnz"); - int64_t imm = off >> 2; - if (imm < -(1 << 18) || imm >= (1 << 18)) - tcc_error("cbz/cbnz: target out of 19-bit range"); - { - uint32_t op = 0x34000000u | sf_bit(is_w) | (((uint32_t)imm & 0x7ffffu) << 5) | rt; - if (op_cbnz) op |= (1u << 24); - gen_le32(op); + int64_t off; Sym *sym; + uint32_t base = 0x34000000u | sf_bit(is_w) | rt; + if (op_cbnz) base |= (1u << 24); + if (sec_branch_offset_or_reloc(target, &off, &sym)) { + int64_t imm = off >> 2; + if (imm < -(1 << 18) || imm >= (1 << 18)) + tcc_error("cbz/cbnz: target out of 19-bit range"); + gen_le32(base | (((uint32_t)imm & 0x7ffffu) << 5)); + } else { + greloca(cur_text_section, sym, ind, R_AARCH64_CONDBR19, off); + gen_le32(base); } } -/* TBZ/TBNZ: op=0/1. bit_pos in 0..63 (bit5 = b5, bits 4..0 = b40). */ +/* TBZ/TBNZ. bit_pos in 0..63 (bit5 = b5, bits 4..0 = b40). */ static void emit_branch_test(int rt, int bit_pos, AArch64Op *target, int op_tbnz) { - int64_t off = sec_local_offset(target, "tbz/tbnz"); - int64_t imm = off >> 2; + int64_t off; Sym *sym; int b5; + uint32_t base; if (bit_pos < 0 || bit_pos > 63) tcc_error("tbz/tbnz: bit position out of range"); - if (imm < -(1 << 13) || imm >= (1 << 13)) - tcc_error("tbz/tbnz: target out of 14-bit range"); b5 = (bit_pos >> 5) & 1; - { - uint32_t op = 0x36000000u | ((uint32_t)b5 << 31) | - (((uint32_t)bit_pos & 0x1fu) << 19) | - (((uint32_t)imm & 0x3fffu) << 5) | rt; - if (op_tbnz) op |= (1u << 24); - gen_le32(op); + base = 0x36000000u | ((uint32_t)b5 << 31) | + (((uint32_t)bit_pos & 0x1fu) << 19) | rt; + if (op_tbnz) base |= (1u << 24); + if (sec_branch_offset_or_reloc(target, &off, &sym)) { + int64_t imm = off >> 2; + if (imm < -(1 << 13) || imm >= (1 << 13)) + tcc_error("tbz/tbnz: target out of 14-bit range"); + gen_le32(base | (((uint32_t)imm & 0x3fffu) << 5)); + } else { + greloca(cur_text_section, sym, ind, R_AARCH64_TSTBR14, off); + gen_le32(base); } } @@ -1331,15 +1386,37 @@ static void do_hint(int token, int hint_arg) gen_le32(base | ((uint32_t)crm << 8) | ((uint32_t)op2 << 5)); } -/* DSB/DMB/ISB. */ +/* Map a token to a DSB/DMB barrier-option CRm value, or -1 if not one. */ +static int tok_to_barrier_crm(int t) +{ + if (t == TOK_ASM_sy) return 0xf; + if (t == TOK_ASM_ish) return 0xb; + if (t == TOK_ASM_ishst) return 0xa; + if (t == TOK_ASM_ishld) return 0x9; + if (t == TOK_ASM_nsh) return 0x7; + if (t == TOK_ASM_nshst) return 0x6; + if (t == TOK_ASM_nshld) return 0x5; + if (t == TOK_ASM_osh) return 0x3; + if (t == TOK_ASM_oshst) return 0x2; + if (t == TOK_ASM_oshld) return 0x1; + return -1; +} + +/* DSB/DMB/ISB. Accepts either a named CRm (sy/ish/ishst/...) or `#imm`. */ static void do_barrier(TCCState *s1, int token) { int crm = 0xf; /* default sy */ if (!at_end_of_insn()) { - AArch64Op a; - parse_operand(s1, &a); - if (a.kind == OP_IMM && !a.e.sym) crm = (int)a.e.v & 0xf; - else tcc_error("dsb/dmb/isb: expected #imm option"); + int named = tok_to_barrier_crm(tok); + if (named >= 0) { + crm = named; + next(); + } else { + AArch64Op a; + parse_operand(s1, &a); + if (a.kind == OP_IMM && !a.e.sym) crm = (int)a.e.v & 0xf; + else tcc_error("dsb/dmb/isb: expected scope name or #imm"); + } } { uint32_t base = 0xd503309fu; /* DSB sy */ @@ -1349,6 +1426,213 @@ static void do_barrier(TCCState *s1, int token) } } +/* ---- system-register access (MRS / MSR / MSR-imm) ---------------- */ + +/* Case-insensitive ASCII compare (sysreg names use both cases in the wild — + * `CurrentEL` is camelcase, `sctlr_el1` is lowercase). */ +static int ci_streq(const char *a, const char *b) +{ + while (*a && *b) { + int ca = (unsigned char)*a, cb = (unsigned char)*b; + if (ca >= 'A' && ca <= 'Z') ca += 32; + if (cb >= 'A' && cb <= 'Z') cb += 32; + if (ca != cb) return 0; + a++; b++; + } + return *a == 0 && *b == 0; +} + +/* Sysreg name → 14-bit encoding laid out as bits 19:5 of the MRS/MSR + * instruction (excluding the L bit at 21): + * bit 14 = o0 (1 for op0=3, 0 for op0=2) + * bits 13:11 = op1 + * bits 10:7 = CRn + * bits 6:3 = CRm + * bits 2:0 = op2 + */ +#define SR_ENC(o0, op1, CRn, CRm, op2) \ + (((o0) << 14) | ((op1) << 11) | ((CRn) << 7) | ((CRm) << 3) | (op2)) + +struct sysreg_entry { + const char *name; + uint32_t enc; +}; + +/* The set used by seed-kernel/kernel.S. Extend as needed. */ +static const struct sysreg_entry sysregs[] = { + /* EL1 read-only. */ + { "currentel", SR_ENC(1, 0, 4, 2, 2) }, + /* EL1. */ + { "sctlr_el1", SR_ENC(1, 0, 1, 0, 0) }, + { "cpacr_el1", SR_ENC(1, 0, 1, 0, 2) }, + { "ttbr0_el1", SR_ENC(1, 0, 2, 0, 0) }, + { "tcr_el1", SR_ENC(1, 0, 2, 0, 2) }, + { "spsr_el1", SR_ENC(1, 0, 4, 0, 0) }, + { "elr_el1", SR_ENC(1, 0, 4, 0, 1) }, + { "sp_el0", SR_ENC(1, 0, 4, 1, 0) }, + { "esr_el1", SR_ENC(1, 0, 5, 2, 0) }, + { "far_el1", SR_ENC(1, 0, 6, 0, 0) }, + { "mair_el1", SR_ENC(1, 0, 10, 2, 0) }, + { "vbar_el1", SR_ENC(1, 0, 12, 0, 0) }, + /* EL2. */ + { "hcr_el2", SR_ENC(1, 4, 1, 1, 0) }, + { "spsr_el2", SR_ENC(1, 4, 4, 0, 0) }, + { "elr_el2", SR_ENC(1, 4, 4, 0, 1) }, + { "sp_el1", SR_ENC(1, 4, 4, 1, 0) }, +}; + +static int lookup_sysreg(const char *name, uint32_t *out) +{ + int i; + for (i = 0; i < (int)(sizeof(sysregs) / sizeof(sysregs[0])); i++) { + if (ci_streq(sysregs[i].name, name)) { + *out = sysregs[i].enc; + return 1; + } + } + return 0; +} + +/* Parse the textual sysreg token at `tok` (an identifier — daifset/daifclr + * tokens are handled separately by the MSR-imm path). Errors if not + * recognised. Consumes the token. */ +static uint32_t parse_sysreg_or_die(const char *what) +{ + uint32_t enc; + const char *name; + if (tok < TOK_IDENT) + tcc_error("%s: expected system register name", what); + name = get_tok_str(tok, NULL); + if (!lookup_sysreg(name, &enc)) + tcc_error("%s: unknown system register `%s`", what, name); + next(); + return enc; +} + +static void emit_msr_mrs(int is_read, int rt, uint32_t enc) +{ + uint32_t base = is_read ? 0xd5300000u : 0xd5100000u; + gen_le32(base | ((enc & 0x7fffu) << 5) | (uint32_t)(rt & 0x1f)); +} + +/* MSR <pstatefield>, #imm — currently DAIFSet / DAIFClr. */ +static void emit_msr_pstate(int is_clr, int imm) +{ + uint32_t base = is_clr ? 0xd50340ffu : 0xd50340dfu; + if (imm < 0 || imm > 15) + tcc_error("msr daifset/daifclr: imm out of 0..15"); + gen_le32(base | (((uint32_t)imm & 0xf) << 8)); +} + +static void do_mrs(TCCState *s1) +{ + AArch64Op a; + uint32_t enc; + parse_operand(s1, &a); + asm_skip_comma(); + need_xreg(&a, "mrs"); + enc = parse_sysreg_or_die("mrs"); + emit_msr_mrs(1, a.reg, enc); +} + +static void do_msr(TCCState *s1) +{ + /* Two forms: + * msr <sysreg>, Xt + * msr DAIFSet|DAIFClr, #imm4 + * Disambiguated by the first operand: tok-based PSTATE keyword vs + * a generic identifier name lookup. */ + if (tok == TOK_ASM_daifset || tok == TOK_ASM_daifclr) { + int is_clr = (tok == TOK_ASM_daifclr); + AArch64Op imm; + next(); + asm_skip_comma(); + asm_skip_hash(); + parse_operand(s1, &imm); + if (imm.kind != OP_IMM || imm.e.sym) + tcc_error("msr daifset/daifclr: expected #imm"); + emit_msr_pstate(is_clr, (int)imm.e.v); + return; + } + { + AArch64Op a; + uint32_t enc = parse_sysreg_or_die("msr"); + asm_skip_comma(); + parse_operand(s1, &a); + need_xreg(&a, "msr"); + emit_msr_mrs(0, a.reg, enc); + } +} + +/* ---- SYS-instruction aliases (IC / TLBI) ------------------------- */ + +/* SYS-instruction encoding helper. Same form as MRS/MSR but op0 is + * implicitly 1 (so o0_high=1 in bit 20 is set by the base), and the L + * bit selects SYS (write, L=0) vs SYSL (read, L=1). We emit SYS only. + */ +static void emit_sys_alias(int op1, int CRn, int CRm, int op2, int rt) +{ + uint32_t base = 0xd5080000u; /* SYS, L=0, o0=1 fixed */ + uint32_t enc = ((uint32_t)(op1 & 7) << 16) | + ((uint32_t)(CRn & 0xf) << 12) | + ((uint32_t)(CRm & 0xf) << 8) | + ((uint32_t)(op2 & 7) << 5); + gen_le32(base | enc | (uint32_t)(rt & 0x1f)); +} + +struct sys_alias { + const char *name; /* lowercased */ + uint8_t op1, CRn, CRm, op2; + uint8_t needs_xt; /* 1 if this op takes an Xt register operand */ +}; + +static const struct sys_alias ic_aliases[] = { + { "iallu", 0, 7, 5, 0, 0 }, + { "ialluis", 0, 7, 1, 0, 0 }, + { "ivau", 3, 7, 5, 1, 1 }, +}; + +static const struct sys_alias tlbi_aliases[] = { + { "vmalle1", 0, 8, 7, 0, 0 }, + { "vmalle1is", 0, 8, 3, 0, 0 }, + { "alle1", 4, 8, 7, 4, 0 }, + { "alle1is", 4, 8, 3, 4, 0 }, +}; + +static const struct sys_alias * +lookup_sys_alias(const struct sys_alias *table, int n, const char *name) +{ + int i; + for (i = 0; i < n; i++) + if (ci_streq(table[i].name, name)) return &table[i]; + return NULL; +} + +static void do_sys_alias_mnemonic(TCCState *s1, int is_tlbi) +{ + const struct sys_alias *tbl = is_tlbi ? tlbi_aliases : ic_aliases; + int n = is_tlbi ? (int)(sizeof(tlbi_aliases) / sizeof(tlbi_aliases[0])) + : (int)(sizeof(ic_aliases) / sizeof(ic_aliases[0])); + const char *name; + const struct sys_alias *e; + int rt = 31; + if (tok < TOK_IDENT) + tcc_error("%s: expected operation name", is_tlbi ? "tlbi" : "ic"); + name = get_tok_str(tok, NULL); + e = lookup_sys_alias(tbl, n, name); + if (!e) + tcc_error("%s: unknown operation `%s`", is_tlbi ? "tlbi" : "ic", name); + next(); + if (e->needs_xt) { + AArch64Op r; + asm_skip_comma(); + parse_operand(s1, &r); + need_xreg(&r, is_tlbi ? "tlbi" : "ic"); + rt = r.reg; + } + emit_sys_alias(e->op1, e->CRn, e->CRm, e->op2, rt); +} + /* ---- top-level dispatch ----------------------------------------- */ ST_FUNC void asm_opcode(TCCState *s1, int token) @@ -1567,6 +1851,22 @@ ST_FUNC void asm_opcode(TCCState *s1, int token) do_barrier(s1, token); return; + case TOK_ASM_mrs: + do_mrs(s1); + return; + case TOK_ASM_msr: + do_msr(s1); + return; + case TOK_ASM_eret: + gen_le32(0xd69f03e0u); + return; + case TOK_ASM_ic: + do_sys_alias_mnemonic(s1, 0); + return; + case TOK_ASM_tlbi: + do_sys_alias_mnemonic(s1, 1); + return; + /* ----- conditional-branch family (b.eq..b.nv + aliases) ----- */ default: if (token >= TOK_ASM_b_eq && token <= TOK_ASM_b_nv) { diff --git a/scripts/simple-patches/tcc-0.9.26/files/arm64-tok.h b/scripts/simple-patches/tcc-0.9.26/files/arm64-tok.h @@ -255,3 +255,29 @@ DEF_ASM(isb) DEF_ASM(dsb) DEF_ASM(dmb) + +/* MRS / MSR (system register access) and MSR-immediate (PSTATE field). */ + DEF_ASM(mrs) + DEF_ASM(msr) + DEF_ASM(eret) +/* PSTATE field names accepted as the first operand of MSR-immediate. + * Lowercased here; arm64-asm.c does a case-insensitive name lookup + * against the lexed identifier so DAIFSet/DAIFClr also match. */ + DEF_ASM(daifset) + DEF_ASM(daifclr) + +/* SYS-instruction aliases (cache / TLB maintenance). */ + DEF_ASM(ic) + DEF_ASM(tlbi) + +/* Barrier-scope option names for dsb/dmb. */ + DEF_ASM(sy) + DEF_ASM(ish) + DEF_ASM(ishst) + DEF_ASM(ishld) + DEF_ASM(nsh) + DEF_ASM(nshst) + DEF_ASM(nshld) + DEF_ASM(osh) + DEF_ASM(oshst) + DEF_ASM(oshld) diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.after b/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.after @@ -0,0 +1,4 @@ +ST_FUNC void gen_expr32(ExprValue *pe); +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) +ST_FUNC void gen_expr64(ExprValue *pe); +#endif diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.before b/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.before @@ -0,0 +1,4 @@ +ST_FUNC void gen_expr32(ExprValue *pe); +#ifdef TCC_TARGET_X86_64 +ST_FUNC void gen_expr64(ExprValue *pe); +#endif diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.after b/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.after @@ -0,0 +1,6 @@ + if (size == 4) { + gen_expr32(&e); +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) + } else if (size == 8) { + gen_expr64(&e); +#endif diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.before b/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.before @@ -0,0 +1,6 @@ + if (size == 4) { + gen_expr32(&e); +#ifdef TCC_TARGET_X86_64 + } else if (size == 8) { + gen_expr64(&e); +#endif diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.after b/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.after @@ -0,0 +1,38 @@ + case TOK_ASMDIR_quad: +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) + size = 8; + goto asm_data; +#else + next(); + for(;;) { +#if HAVE_LONG_LONG + uint64_t vl; +#else + uint32_t vl; +#endif + const char *p; + + p = tokc.str.data; + if (tok != TOK_PPNUM) { + error_constant: + tcc_error("64 bit constant"); + } + vl = strtoll(p, (char **)&p, 0); + if (*p != '\0') + goto error_constant; + next(); + if (sec->sh_type != SHT_NOBITS) { + /* XXX: endianness */ + gen_le32(vl); +#if HAVE_LONG_LONG + gen_le32(vl >> 32); +#endif + } else { + ind += 8; + } + if (tok != ',') + break; + next(); + } + break; +#endif diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.before b/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.before @@ -0,0 +1,38 @@ + case TOK_ASMDIR_quad: +#ifdef TCC_TARGET_X86_64 + size = 8; + goto asm_data; +#else + next(); + for(;;) { +#if HAVE_LONG_LONG + uint64_t vl; +#else + uint32_t vl; +#endif + const char *p; + + p = tokc.str.data; + if (tok != TOK_PPNUM) { + error_constant: + tcc_error("64 bit constant"); + } + vl = strtoll(p, (char **)&p, 0); + if (*p != '\0') + goto error_constant; + next(); + if (sec->sh_type != SHT_NOBITS) { + /* XXX: endianness */ + gen_le32(vl); +#if HAVE_LONG_LONG + gen_le32(vl >> 32); +#endif + } else { + ind += 8; + } + if (tok != ',') + break; + next(); + } + break; +#endif diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh @@ -191,6 +191,14 @@ apply_our_patch arm64-va-arg-pointer "$SRC/arm64-gen.c" apply_our_patch arm64-store-const-lvalue "$SRC/arm64-gen.c" apply_our_patch arm64-load-const-lvalue "$SRC/arm64-gen.c" +# Stock arm64-gen.c truncates SValue::c.i to uint32_t at the top of +# both load() and store(). Fine for struct-field offsets, fatal for +# pointer-sized constant addresses (e.g., the seed-kernel writing to +# the device alias VA 0x109000000 for the PL011 UART). Drop the +# truncation; signed 9-bit ldur/stur offsets fit regardless. +apply_our_patch arm64-svcul-no-truncate "$SRC/arm64-gen.c" +apply_our_patch arm64-svcul-no-truncate-store "$SRC/arm64-gen.c" + # AArch64 assembler — phase 1. Drops in arm64-asm.c + arm64-tok.h and # wires their includes into tcc.h, libtcc.c, and tcctok.h. Patches are # gated by TCC_TARGET_ARM64 in the surrounding source so they no-op on @@ -201,6 +209,20 @@ apply_our_patch arm64-asm-include-tcc-h "$SRC/tcc.h" apply_our_patch arm64-asm-include-libtcc-c "$SRC/libtcc.c" apply_our_patch arm64-tok-include-tcctok-h "$SRC/tcctok.h" +# arm64-asm.c emits gen_expr64 for `.quad sym - sym2`; declare it for +# arm64 too (was x86_64-only). +apply_our_patch tcc-h-gen-expr64-arm64 "$SRC/tcc.h" + +# Route .quad through asm_data on arm64 so symbol-difference expressions +# emit a relocation (R_AARCH64_PREL64) instead of failing to parse. +apply_our_patch tccasm-arm64-quad "$SRC/tccasm.c" +apply_our_patch tccasm-arm64-quad-asm-data "$SRC/tccasm.c" + +# Enable the relocations the assembler now emits: PREL64 (data symbol +# difference), CONDBR19 + TSTBR14 (forward conditional branch / tbz). +apply_our_patch arm64-link-asm-relocs "$SRC/arm64-link.c" +apply_our_patch arm64-link-prel64-condbr "$SRC/arm64-link.c" + # tcc's lexer in ASM_FILE mode swallows mid-line '#' as a line comment, # which kills the ARM/AArch64 '#imm' immediate prefix. Restrict the # '#'-as-line-comment behavior to start-of-line so '#' tokenizes as diff --git a/seed-kernel/Makefile b/seed-kernel/Makefile @@ -6,7 +6,7 @@ CC := gcc LD := ld OUT := build -KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o +KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o $(OUT)/mem.o KIMAGE := $(OUT)/kernel.elf KBIN := $(OUT)/Image USER := $(OUT)/init @@ -38,6 +38,13 @@ $(OUT)/kasm.o: kernel.S | $(OUT) $(OUT)/kernel.o: kernel.c | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< +# Shared mem helpers (memcpy/memset/memmove/memcmp). Lives in +# tcc-cc/mem.c so the tcc-built and gcc-built kernels link the same +# implementation; tcc lowers some struct copies to memmove() that gcc +# inlines, so the kernel needs all three regardless of the compiler. +$(OUT)/mem.o: ../tcc-cc/mem.c | $(OUT) + $(CC) $(KCFLAGS) -c -o $@ $< + $(KIMAGE): $(KOBJS) kernel.lds $(LD) -nostdlib -static -T kernel.lds -o $@ $(KOBJS) diff --git a/seed-kernel/kernel.S b/seed-kernel/kernel.S @@ -57,6 +57,16 @@ in_el1: msr vbar_el1, x9 isb + /* Enable FP/SIMD at EL1+EL0 (CPACR_EL1.FPEN = 0b11) before any C + * call. The gcc build sets FPEN later from kmain() and gets away + * with it because `-mgeneral-regs-only` ensures gcc never emits + * FP/SIMD instructions; tcc has no equivalent flag and saves the + * callee-saved SIMD regs in every function prologue, so the very + * first `bl kmain` would take an FP-trapped exception. */ + mov x9, #(3 << 20) + msr cpacr_el1, x9 + isb + /* Zero BSS. */ ldr x1, =__bss_start ldr x2, =__bss_end diff --git a/seed-kernel/kernel.c b/seed-kernel/kernel.c @@ -76,15 +76,13 @@ __attribute__((noreturn)) static void hang(void) { for (;;) cpu_pause(PAUSE_WFE) /* ─── Tiny libc-ish helpers ─────────────────────────────────────────────── */ -/* libgcc / freestanding ABI helpers gcc may call implicitly. */ -void *memset(void *d, int c, u64 n) { - u8 *dd = d; for (u64 i = 0; i < n; i++) dd[i] = (u8)c; return d; -} -void *memcpy(void *d, const void *s, u64 n) { - u8 *dd = d; const u8 *ss = s; - for (u64 i = 0; i < n; i++) dd[i] = ss[i]; - return d; -} +/* memcpy / memset / memmove come from tcc-cc/mem.c, linked alongside. + * Both gcc and tcc emit calls to these for struct copies and bulk + * zero-init past their inline thresholds; centralising them in + * tcc-cc/mem.c keeps the tcc-built and gcc-built kernels in sync. */ +void *memcpy(void *dst, const void *src, u64 n); +void *memset(void *dst, int c, u64 n); +void *memmove(void *dst, const void *src, u64 n); static int str_eq(const char *a, const char *b) { while (*a && *a == *b) { a++; b++; }