kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit da4ace5a31d7aba4dc1236405908b350ca25897e
parent ed9a6cddccc301aecb643dfdfd2f96a782dd63d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 18:22:53 -0700

Preserve ordered link inputs

Diffstat:
Mdriver/ld.c | 17++++++++++++++++-
Minclude/cfree/link.h | 15+++++++++++++++
Msrc/api/link.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/link/link.c | 4++++
Msrc/link/link.h | 1+
Msrc/link/link_internal.h | 2++
Msrc/link/link_macho.c | 381++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Msrc/link/link_resolve.c | 69+++++++++++++++++++++++++++++++++++++++------------------------------
Atest/link/cases/37_macho_ro_const_abs64/a.aa64.S | 17+++++++++++++++++
Atest/link/cases/37_macho_ro_const_abs64/expected | 1+
Atest/link/cases/37_macho_ro_const_abs64/j_targets | 1+
Atest/link/cases/37_macho_ro_const_abs64/targets | 1+
12 files changed, 364 insertions(+), 200 deletions(-)

diff --git a/driver/ld.c b/driver/ld.c @@ -90,6 +90,8 @@ typedef struct LdOptions { * time → DT_NEEDED entries. */ LdDso* dsos; uint32_t ndsos; + CfreeLinkInputOrder* order; + uint32_t norder; const char** lib_dirs; /* -L */ uint32_t nlib_dirs; @@ -206,10 +208,11 @@ static int ld_alloc_arrays(LdOptions* o, int argc) { driver_alloc_zeroed(o->env, bound * sizeof(*o->object_files)); o->archives = driver_alloc_zeroed(o->env, bound * sizeof(*o->archives)); o->dsos = driver_alloc_zeroed(o->env, bound * sizeof(*o->dsos)); + o->order = driver_alloc_zeroed(o->env, bound * sizeof(*o->order)); o->lib_dirs = driver_alloc_zeroed(o->env, bound * sizeof(*o->lib_dirs)); o->rpaths = driver_alloc_zeroed(o->env, bound * sizeof(*o->rpaths)); o->rpath_links = driver_alloc_zeroed(o->env, bound * sizeof(*o->rpath_links)); - if (!o->object_files || !o->archives || !o->dsos || !o->lib_dirs || + if (!o->object_files || !o->archives || !o->dsos || !o->order || !o->lib_dirs || !o->rpaths || !o->rpath_links) { driver_errf(LD_TOOL, "out of memory"); return 1; @@ -220,6 +223,12 @@ static int ld_alloc_arrays(LdOptions* o, int argc) { /* ---------- positional archive bookkeeping ---------- */ +static void ld_push_order(LdOptions* o, uint8_t kind, uint32_t index) { + CfreeLinkInputOrder* slot = &o->order[o->norder++]; + slot->kind = kind; + slot->index = index; +} + static void ld_push_archive(LdOptions* o, const char* path, int owned, size_t owned_size) { LdArchive* a = &o->archives[o->narchives++]; @@ -229,6 +238,7 @@ static void ld_push_archive(LdOptions* o, const char* path, int owned, a->whole_archive = o->cur_whole_archive; a->link_mode = o->cur_link_mode; a->group_id = o->cur_group_id; + ld_push_order(o, CFREE_LINK_INPUT_ARCHIVE, o->narchives - 1u); } static void ld_push_dso(LdOptions* o, const char* path, int owned, @@ -237,6 +247,7 @@ static void ld_push_dso(LdOptions* o, const char* path, int owned, d->path = path; d->owned = owned; d->owned_size = owned_size; + ld_push_order(o, CFREE_LINK_INPUT_DSO, o->ndsos - 1u); } /* Filename ends in `.so` (with no further extension) or in `.so.N` @@ -580,6 +591,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { ld_push_dso(o, a, 0, 0); } else { o->object_files[o->nobject_files++] = a; + ld_push_order(o, CFREE_LINK_INPUT_OBJ_BYTES, o->nobject_files - 1u); } } @@ -622,6 +634,7 @@ static void ld_options_release(LdOptions* o) { driver_free(o->env, o->object_files, bound * sizeof(*o->object_files)); driver_free(o->env, o->archives, bound * sizeof(*o->archives)); driver_free(o->env, o->dsos, bound * sizeof(*o->dsos)); + driver_free(o->env, o->order, bound * sizeof(*o->order)); driver_free(o->env, o->lib_dirs, bound * sizeof(*o->lib_dirs)); driver_free(o->env, o->rpaths, bound * sizeof(*o->rpaths)); driver_free(o->env, o->rpath_links, bound * sizeof(*o->rpath_links)); @@ -801,6 +814,8 @@ static int ld_run_link(LdOptions* o) { inputs.narchives = o->narchives; inputs.dso_bytes = dso_in; inputs.ndso_bytes = o->ndsos; + inputs.order = o->order; + inputs.norder = o->norder; inputs.linker_script = script; inputs.entry = o->entry; inputs.build_id_mode = o->build_id_mode; diff --git a/include/cfree/link.h b/include/cfree/link.h @@ -131,6 +131,19 @@ typedef struct CfreeLinkArchiveInput { uint8_t pad; } CfreeLinkArchiveInput; +typedef enum CfreeLinkInputOrderKind { + CFREE_LINK_INPUT_OBJ, + CFREE_LINK_INPUT_OBJ_BYTES, + CFREE_LINK_INPUT_ARCHIVE, + CFREE_LINK_INPUT_DSO, +} CfreeLinkInputOrderKind; + +typedef struct CfreeLinkInputOrder { + uint8_t kind; /* CfreeLinkInputOrderKind */ + uint8_t pad[3]; + uint32_t index; /* index into the matching CfreeLinkInputs array */ +} CfreeLinkInputOrder; + typedef struct CfreeLinkInputs { CfreeObjBuilder *const *objs; uint32_t nobjs; @@ -145,6 +158,8 @@ typedef struct CfreeLinkInputs { uint8_t build_id_mode; /* CfreeBuildIdMode */ const uint8_t *build_id_bytes; uint32_t build_id_len; + const CfreeLinkInputOrder *order; /* optional; NULL preserves legacy order */ + uint32_t norder; } CfreeLinkInputs; typedef struct CfreeExeLinkOptions { diff --git a/src/api/link.c b/src/api/link.c @@ -23,8 +23,63 @@ CfreeJit* cfree_jit_from_image(LinkImage*); +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + static void load_inputs(Linker* l, const CfreeLinkInputs* in) { uint32_t i; + if (in->order && in->norder) { + for (i = 0; i < in->norder; ++i) { + const CfreeLinkInputOrder* ord = &in->order[i]; + switch ((CfreeLinkInputOrderKind)ord->kind) { + case CFREE_LINK_INPUT_OBJ: + if (ord->index >= in->nobjs) + compiler_panic(l->c, no_loc(), + "link: ordered obj input index out of range"); + if (in->objs[ord->index]) link_add_obj(l, in->objs[ord->index]); + break; + case CFREE_LINK_INPUT_OBJ_BYTES: { + const CfreeBytes* b; + if (ord->index >= in->nobj_bytes) + compiler_panic(l->c, no_loc(), + "link: ordered object input index out of range"); + b = &in->obj_bytes[ord->index]; + link_add_obj_bytes(l, b->name, b->data, b->len); + break; + } + case CFREE_LINK_INPUT_ARCHIVE: { + const CfreeLinkArchiveInput* a; + if (ord->index >= in->narchives) + compiler_panic(l->c, no_loc(), + "link: ordered archive input index out of range"); + a = &in->archives[ord->index]; + link_add_archive_bytes(l, a->bytes.name, a->bytes.data, + a->bytes.len, a->whole_archive, + a->link_mode, a->group_id); + break; + } + case CFREE_LINK_INPUT_DSO: { + const CfreeBytes* b; + if (ord->index >= in->ndso_bytes) + compiler_panic(l->c, no_loc(), + "link: ordered DSO input index out of range"); + b = &in->dso_bytes[ord->index]; + link_add_dso_bytes(l, b->name, b->data, b->len); + break; + } + default: + compiler_panic(l->c, no_loc(), "link: invalid ordered input kind"); + } + } + if (in->linker_script) link_set_script(l, in->linker_script); + if (in->entry) link_set_entry(l, in->entry); + (void)in->build_id_mode; + (void)in->build_id_bytes; + (void)in->build_id_len; + return; + } for (i = 0; i < in->nobjs; ++i) { if (in->objs[i]) link_add_obj(l, in->objs[i]); } diff --git a/src/link/link.c b/src/link/link.c @@ -113,6 +113,7 @@ LinkInputId link_add_obj(Linker* l, ObjBuilder* ob) { if (!l || !ob) return LINK_INPUT_NONE; in = inputs_push(l, &id); in->kind = LINK_INPUT_OBJ; + in->order = l->next_input_order++; in->obj = ob; return id; } @@ -150,6 +151,7 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, reader_name, name ? name : "(unnamed)"); in = inputs_push(l, &id); in->kind = LINK_INPUT_OBJ_BYTES; + in->order = l->next_input_order++; in->obj = ob; /* re-uses the ObjBuilder slot for ownership */ in->name = name ? pool_intern_cstr(l->c->global, name) : 0; return id; @@ -197,6 +199,7 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, reader_name, name ? name : "(unnamed)"); in = inputs_push(l, &id); in->kind = LINK_INPUT_DSO_BYTES; + in->order = l->next_input_order++; in->obj = ob; in->name = name ? pool_intern_cstr(l->c->global, name) : 0; /* DT_SONAME wins; fall back to the file's basename if the DSO has @@ -247,6 +250,7 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, if (!ar) compiler_panic(l->c, no_loc(), "link: out of memory growing archives"); ar->name = name ? pool_intern_cstr(l->c->global, name) : 0; + ar->order = l->next_input_order++; ar->whole_archive = whole_archive; ar->link_mode = link_mode; ar->group_id = group_id; diff --git a/src/link/link.h b/src/link/link.h @@ -37,6 +37,7 @@ typedef struct LinkInput { LinkInputId id; u8 kind; /* LinkInputKind */ u8 pad[3]; + u32 order; ObjBuilder* obj; /* for LINK_INPUT_OBJ, otherwise NULL until read */ Sym name; /* diagnostic name for bytes inputs */ /* DSO-only: SONAME extracted from PT_DYNAMIC.DT_SONAME. 0 if absent. diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -70,6 +70,7 @@ typedef struct LinkArchive { Sym name; LinkArchiveMember* members; u32 nmembers; + u32 order; u8 whole_archive; u8 link_mode; u8 group_id; @@ -84,6 +85,7 @@ struct Linker { Heap* heap; LinkInputs inputs; /* LinkInputId = slot index + 1 */ LinkArchives archives; + u32 next_input_order; Sym entry_name; /* Set by link_set_script. NULL: layout takes the existing default * bucket-based path. Non-NULL: layout_sections_scripted walks the diff --git a/src/link/link_macho.c b/src/link/link_macho.c @@ -155,10 +155,10 @@ static u32 mbuf_str(MByte* b, const char* s, u32 n) { typedef struct MachImp { LinkSymId sym; Sym name; - u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */ - u32 stub_idx; /* 1-based index into __stubs (0 if data import) */ - u32 got_idx; /* 1-based index into __got */ - u32 imports_strx; /* offset into chained-fixups symbol pool */ + u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */ + u32 stub_idx; /* 1-based index into __stubs (0 if data import) */ + u32 got_idx; /* 1-based index into __got */ + u32 imports_strx; /* offset into chained-fixups symbol pool */ u8 is_func; u8 weak; /* internal=1 means this entry is an in-image symbol that's referenced @@ -169,7 +169,8 @@ typedef struct MachImp { * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */ u8 internal; u8 pad[1]; - u64 internal_vaddr; /* image-relative target vaddr; meaningful only when internal=1 */ + u64 internal_vaddr; /* image-relative target vaddr; meaningful only when + internal=1 */ } MachImp; typedef struct MachDylib { @@ -183,11 +184,13 @@ typedef struct MachDylib { * ones). The descriptor itself is laid out in __DATA,__thread_vars by * either the input objects (internal) or the providing dylib (imported). */ typedef struct MachTlv { - LinkSymId sym; /* canonical descriptor LinkSymId */ - u32 tlv_idx; /* 1-based slot index in __thread_ptrs */ - u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal (REBASE) */ + LinkSymId sym; /* canonical descriptor LinkSymId */ + u32 tlv_idx; /* 1-based slot index in __thread_ptrs */ + u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal + (REBASE) */ u8 pad[3]; - u32 import_idx; /* 1-based MachImp index when imported (for chained-bind ordinal) */ + u32 import_idx; /* 1-based MachImp index when imported (for chained-bind + ordinal) */ } MachTlv; /* ---- planned section ---- */ @@ -211,10 +214,10 @@ typedef struct MSec { u64 file_offset; u64 size; u32 align; - u32 flags; /* S_TYPE | S_ATTR_* */ + u32 flags; /* S_TYPE | S_ATTR_* */ u32 reserved1; u32 reserved2; - u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */ + u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */ u8 is_zerofill; u8 pad[6]; } MSec; @@ -232,10 +235,10 @@ typedef struct MSeg { u64 vmsize; u64 fileoff; u64 filesize; - u32 nsects; /* MSec count in segment — internal layout */ - u32 first_sec; /* first index into MSec[] */ - u32 nouts; /* OutSec count in segment — what hits the file */ - u32 first_out; /* first index into OutSec[] */ + u32 nsects; /* MSec count in segment — internal layout */ + u32 first_sec; /* first index into MSec[] */ + u32 nouts; /* OutSec count in segment — what hits the file */ + u32 first_out; /* first index into OutSec[] */ } MSeg; /* On-disk section view: one record per (segname, sectname) within a @@ -270,12 +273,13 @@ typedef struct MCtx { /* imports */ MachImp* imports; u32 nimports; - u32 nimports_real; /* count of imports with internal=0 (== prefix length; - * collect_imports appends internal=1 entries last) */ + u32 nimports_real; /* count of imports with internal=0 (== prefix length; + * collect_imports appends internal=1 entries last) */ u32 nimport_funcs; MachDylib* dylibs; u32 ndylibs; - /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space + 1. */ + /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space + * + 1. */ u32* sym_to_imp; u32 sym_to_imp_size; @@ -284,7 +288,7 @@ typedef struct MCtx { u32 nsecs; OutSec* outs; u32 nouts; - MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */ + MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */ u32 nsegs; /* Synthetic byte buffers, owned. */ @@ -324,16 +328,16 @@ typedef struct MCtx { u64 data_memsz; u64 linkedit_vaddr; u64 linkedit_fileoff; - u32 entry_offset; /* offset of entry within __TEXT segment */ + u32 entry_offset; /* offset of entry within __TEXT segment */ - u64 headers_size; /* header + loadcmds */ + u64 headers_size; /* header + loadcmds */ /* LINKEDIT contents */ MByte chained_fixups; MByte exports_trie; - MByte symtab; /* binary nlist_64 array */ + MByte symtab; /* binary nlist_64 array */ MByte strtab; - MByte indirect; /* u32 array */ + MByte indirect; /* u32 array */ MByte fn_starts; MByte data_in_code; MByte codesig; @@ -424,8 +428,10 @@ static void collect_imports(MCtx* x) { LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE; Sym install = 0; if (dso_id != LINK_INPUT_NONE && x->linker && - dso_id - 1u < LinkInputs_count(&x->linker->inputs)) - install = LinkInputs_at(&x->linker->inputs, dso_id - 1u)->soname; + dso_id - 1u < LinkInputs_count(&x->linker->inputs)) { + LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u); + if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname; + } if (install == 0) install = pool_intern_cstr(x->c->global, "/usr/lib/libSystem.B.dylib"); u32 ord = dylib_ordinal_of(x, install); @@ -609,6 +615,22 @@ static int sec_is_zerofill(const LinkSection* ls) { return ls->sem == SSEM_NOBITS; } +static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) { + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (r->link_section_id == id && r->kind == R_ABS64) return 1; + } + return 0; +} + +static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) { + if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) || + sec_is_zerofill(ls)) { + return 0; + } + return section_has_abs64_reloc(img, ls->id); +} + /* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names * round-trip into MSec's inline 16-byte buffers; literal defaults point * at .rodata strings. Caller passes the MSec for per-section storage — @@ -660,12 +682,12 @@ static void plan_layout(MCtx* x) { x->segs[0].first_sec = 0; /* Segments 1..4 */ - seg_init(&x->segs[1], "__TEXT", - VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE); - seg_init(&x->segs[2], "__DATA_CONST", - VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE); - seg_init(&x->segs[3], "__DATA", - VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE); + seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE, + VM_PROT_READ | VM_PROT_EXECUTE); + seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE); + seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE); seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ); x->nsegs = 5; @@ -703,7 +725,8 @@ static void plan_layout(MCtx* x) { LinkSection* ls = &img->sections[i]; if (!ls->size) continue; if (sec_is_writable(ls)) continue; - if (sec_is_zerofill(ls)) continue; /* placed in __DATA */ + if (sec_is_zerofill(ls)) continue; /* placed in __DATA */ + if (sec_needs_data_const(img, ls)) continue; MSec* m = &x->secs[x->nsecs++]; memset(m, 0, sizeof(*m)); m->link_sec_id = ls->id; @@ -713,10 +736,9 @@ static void plan_layout(MCtx* x) { m->align = ls->align ? ls->align : 1u; m->size = ls->size; m->segidx = 1; - m->flags = sec_is_exec(ls) - ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ | - 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/) - : 0u; + m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ | + 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/) + : 0u; } /* __stubs synthetic */ @@ -736,7 +758,7 @@ static void plan_layout(MCtx* x) { m->size = x->stubs_size; m->segidx = 1; m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; - m->reserved1 = 0; /* fill in later: indirect-symtab base */ + m->reserved1 = 0; /* fill in later: indirect-symtab base */ m->reserved2 = x->arch->macho_stub_size; } x->segs[1].nsects = x->nsecs - first_text_sec; @@ -747,8 +769,7 @@ static void plan_layout(MCtx* x) { if (x->nimports) { x->got_size = x->nimports * MZ_GOT_SIZE; x->got_bytes = (u8*)h->alloc(h, x->got_size, 8); - if (!x->got_bytes) - compiler_panic(x->c, no_loc(), "link_macho: oom on got"); + if (!x->got_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on got"); memset(x->got_bytes, 0, x->got_size); MSec* m = &x->secs[x->nsecs++]; memset(m, 0, sizeof(*m)); @@ -760,7 +781,20 @@ static void plan_layout(MCtx* x) { m->size = x->got_size; m->segidx = 2; m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/; - m->reserved1 = 0; /* indirect-symtab base */ + m->reserved1 = 0; /* indirect-symtab base */ + } + for (u32 i = 0; i < img->nsections; ++i) { + LinkSection* ls = &img->sections[i]; + if (!sec_needs_data_const(img, ls)) continue; + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->link_sec_id = ls->id; + pick_macho_names(ls, x->c, m); + m->segname = "__DATA_CONST"; + m->align = ls->align ? ls->align : 1u; + m->size = ls->size; + m->segidx = 2; + m->flags = 0; } x->segs[2].nsects = x->nsecs - first_dc; x->segs[2].first_sec = first_dc; @@ -885,7 +919,7 @@ static void plan_layout(MCtx* x) { if (!seen) ++cnt; } sg->nouts = cnt; - sg->first_out = 0; /* assigned in Phase B */ + sg->first_out = 0; /* assigned in Phase B */ } /* Compute load-command count + sizeofcmds, then back-fill section @@ -896,11 +930,11 @@ static void plan_layout(MCtx* x) { if (i == 0) { ++nseg_real; continue; - } /* PAGEZERO */ + } /* PAGEZERO */ if (i == 4) { ++nseg_real; continue; - } /* LINKEDIT always */ + } /* LINKEDIT always */ if (x->segs[i].nsects > 0) ++nseg_real; } /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64 @@ -908,7 +942,7 @@ static void plan_layout(MCtx* x) { u32 sizeofcmds = 0; for (u32 i = 0; i < x->nsegs; ++i) { if (i == 0 || i == 4) { - sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */ + sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */ continue; } if (x->segs[i].nsects == 0) continue; @@ -982,10 +1016,10 @@ static void plan_layout(MCtx* x) { cur_v += m->size; seen_zerofill = 1; } - sg->filesize = (i == 1) ? (cur_f - seg_start_f) - : (first_zerofill_v - ? (first_zerofill_v - seg_start_v) - : (cur_v - seg_start_v)); + sg->filesize = (i == 1) + ? (cur_f - seg_start_f) + : (first_zerofill_v ? (first_zerofill_v - seg_start_v) + : (cur_v - seg_start_v)); sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE); if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE; if (i == 1) { @@ -1025,7 +1059,14 @@ static void plan_layout(MCtx* x) { x->data_memsz = sg->vmsize; } vaddr = sg->vmaddr + sg->vmsize; - fileoff = sg->fileoff + sg->filesize; + /* Mach-O segments are mapped in page units. If a segment's memory + * image extends past its initialized file bytes (for example + * __DATA,__bss), the following segment's fileoff must not reuse those + * pages or the kernel can map later file contents into the zero-fill + * tail. */ + fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE)) + ? sg->vmsize + : sg->filesize); (void)seen_zerofill; } /* LINKEDIT placeholder; size is filled after blob assembly. */ @@ -1076,8 +1117,7 @@ static void plan_layout(MCtx* x) { } u32 cap = x->nsecs + 1u; x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec)); - if (!x->outs) - compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec"); + if (!x->outs) compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec"); memset(x->outs, 0, sizeof(OutSec) * cap); x->nouts = 0; for (u32 i = 0; i < x->nsecs; ++i) { @@ -1197,12 +1237,12 @@ static void shift_sections(MCtx* x) { */ typedef struct FixSite { - u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */ - u8 is_bind; /* 0 = rebase, 1 = bind */ + u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */ + u8 is_bind; /* 0 = rebase, 1 = bind */ u8 pad[2]; - u32 import_idx; /* 1-based import index for binds, 0 for rebases */ - u64 vaddr; /* absolute VA of the slot */ - u64 rebase_target; /* unslid target VA; only used for rebases */ + u32 import_idx; /* 1-based import index for binds, 0 for rebases */ + u64 vaddr; /* absolute VA of the slot */ + u64 rebase_target; /* unslid target VA; only used for rebases */ } FixSite; typedef struct FixList { @@ -1445,7 +1485,7 @@ static void apply_relocs(MCtx* x, FixList* fl) { u64 tgt_v = s ? s->vaddr : 0; u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE; wr_u64_le(slot, tgt_v); - if (tgt_v == 0) continue; /* weak-undef → NULL */ + if (tgt_v == 0) continue; /* weak-undef → NULL */ FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v}; fix_push(fl, &fs); } else { @@ -1476,7 +1516,7 @@ static void apply_relocs(MCtx* x, FixList* fl) { LinkSymbol* s = sym_at(img, ts->sym); u64 tgt_v = s ? s->vaddr : 0; wr_u64_le(slot, tgt_v); - if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */ + if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */ FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v}; fix_push(fl, &fs); } @@ -1492,9 +1532,9 @@ static void apply_relocs(MCtx* x, FixList* fl) { */ typedef struct PageChain { - u32 first_offset_in_page; /* relative to page start */ + u32 first_offset_in_page; /* relative to page start */ u32 nsites; - u32 first_site_idx; /* into a per-segment site array */ + u32 first_site_idx; /* into a per-segment site array */ } PageChain; static int site_cmp_by_vaddr(const void* a, const void* b) { @@ -1526,16 +1566,15 @@ static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo, */ u64 v = 0; if (is_bind) { - u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */ + u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */ u64 addend = 0; u64 next = (u64)next4 & 0xfffull; v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) | ((u64)1 << 63); } else { /* rebase: target is full vmaddr; we get hi:lo split. */ - u64 target = - ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo; - target &= ((u64)1 << 36) - 1u; /* 36 bits */ + u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo; + target &= ((u64)1 << 36) - 1u; /* 36 bits */ u64 high8 = 0; u64 next = (u64)next4 & 0xfffull; v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) | @@ -1558,14 +1597,14 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { * uint32 imports_format (=1) * uint32 symbols_format (=0) */ - u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */ + u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */ (void)hdr_pos; u32 starts_offset_pos = mbuf_u32(out, 0); u32 imports_offset_pos = mbuf_u32(out, 0); u32 symbols_offset_pos = mbuf_u32(out, 0); mbuf_u32(out, x->nimports_real); mbuf_u32(out, DYLD_CHAINED_IMPORT); - mbuf_u32(out, 0); /* symbols uncompressed */ + mbuf_u32(out, 0); /* symbols uncompressed */ /* dyld expects 8-byte alignment of the starts table. */ mbuf_align(out, 4); @@ -1618,10 +1657,10 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { * uint16 page_count * uint16 page_start[page_count] (0xFFFF = no fixups in page) */ - u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */ + u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */ mbuf_u16(out, (u16)MZ_PAGE); mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64); - mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */ + mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */ mbuf_u32(out, 0); mbuf_u16(out, (u16)page_count); u32 page_starts_pos = out->len; @@ -1639,8 +1678,7 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { /* Sites are sorted globally; collect contiguous run for this seg. */ u32 cur = first; while (cur < first + count) { - u32 page_idx = - (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE); + u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE); u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE); wr_u16_le(out->data + page_starts_pos + page_idx * 2u, (u16)offset_in_page); @@ -1659,27 +1697,25 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { u64 dist = fl->a[k + 1].vaddr - s->vaddr; next4 = (u32)(dist / 4u); } - /* Find segment bytes; for slot in __DATA_CONST __got use - * x->got_bytes; for __DATA, special-case the synthetic - * __thread_ptrs region and otherwise walk LinkSections. */ + /* Find segment bytes. Synthetic pointer sections have private + * buffers; file-backed sections can live in any segment, including + * pointer-bearing read-only constants in __TEXT. */ u8* slot = NULL; - if (s->segidx == 2) { + if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr && + s->vaddr < x->got_vaddr + x->got_size) { /* __DATA_CONST: __got slot. */ slot = x->got_bytes + (s->vaddr - x->got_vaddr); - } else if (s->segidx == 3) { - if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr && - s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) { - slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr); - } else { - /* __DATA: walk MSecs to find the matching one. */ - MSec* m = msec_for_vaddr(x, s->vaddr); - if (m && m->link_sec_id) { - u8* base = bytes_for_section(x, m, x->img); - if (base) { - LinkSection* ls = &x->img->sections[m->link_sec_id - 1u]; - u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr)); - slot = base + in_off; - } + } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr && + s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) { + slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr); + } else { + MSec* m = msec_for_vaddr(x, s->vaddr); + if (m && m->link_sec_id) { + u8* base = bytes_for_section(x, m, x->img); + if (base) { + LinkSection* ls = &x->img->sections[m->link_sec_id - 1u]; + u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr)); + slot = base + in_off; } } } @@ -1691,6 +1727,14 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { if (s->is_bind) { /* ordinal is import index (1-based) - 1; chained-import format * uses 0-based. */ + if (s->import_idx == 0 || s->import_idx > x->nimports_real) { + compiler_panic( + x->c, no_loc(), + "link_macho: chained bind for vaddr 0x%llx uses import index " + "%u outside real import table size %u", + (unsigned long long)s->vaddr, (unsigned)s->import_idx, + (unsigned)x->nimports_real); + } u32 ord = s->import_idx - 1u; emit_pointer(slot, 1, ord, 0, next4); } else { @@ -1722,13 +1766,18 @@ static void build_chained_fixups(MCtx* x, FixList* fl) { MachImp* mi = &x->imports[i]; size_t nl; const char* nm = pool_str(x->c->global, mi->name, &nl); + if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) { + compiler_panic(x->c, no_loc(), + "link_macho: invalid chained import %u " + "(name=%u dylib_ord=%u ndylibs=%u)", + (unsigned)i, (unsigned)mi->name, + (unsigned)mi->dylib_ord, (unsigned)x->ndylibs); + } u32 off = out->len - symbols_off; - if (nm && nl) mbuf_str(out, nm, (u32)nl); - else mbuf_u8(out, 0); + mbuf_str(out, nm, (u32)nl); /* Patch the import slot. */ u32 packed = ((u32)mi->dylib_ord & 0xffu) | - ((u32)(mi->weak ? 1u : 0u) << 8) | - ((off & 0x7fffffu) << 9); + ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9); wr_u32_le(out->data + imports_off + i * 4u, packed); } (void)symbols_off; @@ -1767,8 +1816,8 @@ static void build_exports_trie(MCtx* x) { LinkSymbol* esym = sym_at(img, img->entry_sym); if (!esym || !esym->defined) { /* No entry — emit a single empty terminal trie. */ - mbuf_u8(out, 0); /* terminal_size 0 */ - mbuf_u8(out, 0); /* children 0 */ + mbuf_u8(out, 0); /* terminal_size 0 */ + mbuf_u8(out, 0); /* children 0 */ return; } size_t nl; @@ -1803,8 +1852,8 @@ static void build_exports_trie(MCtx* x) { /* root: terminal_size=0, children_count=1, "_main"\0, child_offset= * (leaf-position uleb). */ /* We'll back-patch child_offset after we know the leaf position. */ - mbuf_u8(out, 0); /* root terminal size */ - mbuf_u8(out, 1); /* children_count */ + mbuf_u8(out, 0); /* root terminal size */ + mbuf_u8(out, 1); /* children_count */ mbuf_str(out, nm, (u32)nl); /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */ u32 child_off_pos = out->len; @@ -1816,7 +1865,7 @@ static void build_exports_trie(MCtx* x) { mbuf_u8(out, (u8)leaf_payload_len); uleb128(out, flags); uleb128(out, entry_off); - mbuf_u8(out, 0); /* children_count */ + mbuf_u8(out, 0); /* children_count */ /* Patch child_offset uleb. */ u32 v = leaf_pos; @@ -1834,8 +1883,7 @@ static void build_exports_trie(MCtx* x) { * non-zero byte and also forcing remaining bytes to be 0x80 * extension or trim. Simpler: set last byte explicitly. */ out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu); - for (u32 j = i + 1; j < 5; ++j) - out->data[child_off_pos + j] = 0x80; + for (u32 j = i + 1; j < 5; ++j) out->data[child_off_pos + j] = 0x80; out->data[child_off_pos + 4] = 0x00; break; } @@ -1849,7 +1897,7 @@ static void build_exports_trie(MCtx* x) { typedef struct NlistRec { u32 strx; u8 type; - u8 sect; /* 1-based section index (Mach-O) */ + u8 sect; /* 1-based section index (Mach-O) */ u16 desc; u64 value; } NlistRec; @@ -1885,7 +1933,7 @@ static void build_symtab(MCtx* x) { if (!s->defined) continue; if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; if (s->name == 0) continue; - if (s->kind == SK_ABS) continue; /* skip abs externs */ + if (s->kind == SK_ABS) continue; /* skip abs externs */ /* Locate which OutSec contains this vaddr to figure out n_sect. * n_sect is the 1-based index into the flat section_64 table the * file actually contains (post-coalesce), matching what we emit @@ -1935,9 +1983,9 @@ static void build_symtab(MCtx* x) { wr_u32_le(t + 0, strx); t[4] = N_UNDF | N_EXT; t[5] = 0; - /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) */ - u16 desc = - (u16)(((u16)mi->dylib_ord & 0xff) << 8); + /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) + */ + u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8); if (mi->weak) desc |= N_WEAK_REF; wr_u16_le(t + 6, desc); wr_u64_le(t + 8, 0); @@ -2056,20 +2104,12 @@ static void layout_linkedit(MCtx* x) { * u32 magic (0xfade0c02) * u32 length (bytes including all hashes) * u32 version (>=0x20400 for execSeg fields) - * u32 flags (=0 ad-hoc — actually flags must include 0x2 (kSecCodeSignatureAdhoc)) - * u32 hashOffset (offset of first slot hash) - * u32 identOffset (offset of identifier string) - * u32 nSpecialSlots (=0) - * u32 nCodeSlots - * u32 codeLimit (file bytes covered) - * u8 hashSize (=32) - * u8 hashType (=2 sha256) - * u8 platform (=0) - * u8 pageSize (=12 for 4096) - * u32 spare2 (=0) - * u32 scatterOffset (=0) - * u32 teamOffset (=0) - * u32 spare3 (=0) + * u32 flags (=0 ad-hoc — actually flags must include 0x2 + * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32 + * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32 + * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8 + * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32 + * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0) * u64 codeLimit64 (=0) * u64 execSegBase (=__TEXT.fileoff) * u64 execSegLimit (=__TEXT.filesize) @@ -2088,7 +2128,7 @@ static void wr_u64_be(u8* p, u64 v) { /* Build the codesig blob with placeholder hashes; size is precise so * file layout is final after this. */ static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { - u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */ + u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */ u32 nslots = (code_limit + code_page - 1u) / code_page; /* CodeDirectory size: @@ -2112,7 +2152,7 @@ static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { /* SuperBlob header */ wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE); wr_u32_be(sb + 4, sb_size); - wr_u32_be(sb + 8, 1); /* count */ + wr_u32_be(sb + 8, 1); /* count */ /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */ wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY); wr_u32_be(sb + 16, 20u); @@ -2121,24 +2161,24 @@ static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { u8* cd = sb + 20; wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY); wr_u32_be(cd + 4, cd_size); - wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */ - wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */ - wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */ - wr_u32_be(cd + 20, cd_hdr); /* identOffset */ - wr_u32_be(cd + 24, 0); /* nSpecialSlots */ + wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */ + wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */ + wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */ + wr_u32_be(cd + 20, cd_hdr); /* identOffset */ + wr_u32_be(cd + 24, 0); /* nSpecialSlots */ wr_u32_be(cd + 28, nslots); wr_u32_be(cd + 32, code_limit); cd[36] = (u8)CS_SHA256_LEN; cd[37] = (u8)CS_HASHTYPE_SHA256; - cd[38] = 0; /* platform */ + cd[38] = 0; /* platform */ cd[39] = (u8)CS_PAGE_SIZE_LOG2; - wr_u32_be(cd + 40, 0); /* spare2 */ - wr_u32_be(cd + 44, 0); /* scatterOffset */ - wr_u32_be(cd + 48, 0); /* teamOffset */ - wr_u32_be(cd + 52, 0); /* spare3 */ - wr_u64_be(cd + 56, 0); /* codeLimit64 */ - wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */ - wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */ + wr_u32_be(cd + 40, 0); /* spare2 */ + wr_u32_be(cd + 44, 0); /* scatterOffset */ + wr_u32_be(cd + 48, 0); /* teamOffset */ + wr_u32_be(cd + 52, 0); /* spare3 */ + wr_u64_be(cd + 56, 0); /* codeLimit64 */ + wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */ + wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */ wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY); /* identifier */ @@ -2173,8 +2213,7 @@ static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs, static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { MSeg* sg = &x->segs[segidx]; - u32 seg_cmd_size = - MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; + u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; u32 base = lc->len; mbuf_u32(lc, LC_SEGMENT_64); mbuf_u32(lc, seg_cmd_size); @@ -2192,7 +2231,7 @@ static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { mbuf_u32(lc, sg->maxprot); mbuf_u32(lc, sg->initprot); mbuf_u32(lc, sg->nouts); - mbuf_u32(lc, 0); /* flags */ + mbuf_u32(lc, 0); /* flags */ for (u32 j = 0; j < sg->nouts; ++j) { OutSec* o = &x->outs[sg->first_out + j]; @@ -2202,7 +2241,7 @@ static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { size_t sl = o->sectname ? strlen(o->sectname) : 0; if (sl > 16) sl = 16; if (sl) memcpy(sname, o->sectname, sl); - size_t gl = strlen(sg->name); /* segname must match */ + size_t gl = strlen(sg->name); /* segname must match */ if (gl > 16) gl = 16; memcpy(gname, sg->name, gl); mbuf_append(lc, sname, 16); @@ -2215,12 +2254,12 @@ static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { u32 al = 0; while ((1u << al) < a) ++al; mbuf_u32(lc, al); - mbuf_u32(lc, 0); /* reloff */ - mbuf_u32(lc, 0); /* nreloc */ + mbuf_u32(lc, 0); /* reloff */ + mbuf_u32(lc, 0); /* nreloc */ mbuf_u32(lc, o->flags); mbuf_u32(lc, o->reserved1); mbuf_u32(lc, o->reserved2); - mbuf_u32(lc, 0); /* reserved3 */ + mbuf_u32(lc, 0); /* reserved3 */ } (void)base; } @@ -2287,13 +2326,12 @@ void link_emit_macho(LinkImage* img, Writer* w) { mbuf_init(&lc, x.h); /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */ - emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */ - emit_load_command_segment(&lc, &x, 1); /* TEXT */ + emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */ + emit_load_command_segment(&lc, &x, 1); /* TEXT */ if (x.segs[2].nsects > 0) - emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */ - if (x.segs[3].nsects > 0) - emit_load_command_segment(&lc, &x, 3); /* DATA */ - emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */ + emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */ + if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */ + emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */ /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */ mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS); @@ -2324,19 +2362,24 @@ void link_emit_macho(LinkImage* img, Writer* w) { u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0; mbuf_u32(&lc, LC_DYSYMTAB); mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE); - mbuf_u32(&lc, 0); /* ilocalsym */ + mbuf_u32(&lc, 0); /* ilocalsym */ mbuf_u32(&lc, nlocal); mbuf_u32(&lc, nlocal); mbuf_u32(&lc, nextdef); mbuf_u32(&lc, nlocal + nextdef); mbuf_u32(&lc, nundef); - mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* tocoff, ntoc */ - mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* modtaboff, nmodtab */ - mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* tocoff, ntoc */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* modtaboff, nmodtab */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */ mbuf_u32(&lc, x.indirect_off); mbuf_u32(&lc, x.indirect.len / 4u); - mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* extreloff, nextrel */ - mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* locreloff, nlocrel */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* extreloff, nextrel */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* locreloff, nlocrel */ /* LC_LOAD_DYLINKER */ { @@ -2344,7 +2387,7 @@ void link_emit_macho(LinkImage* img, Writer* w) { u32 cmd_size = (u32)ALIGN_UP((u64)(12u + (u32)strlen(dyld) + 1u), 8u); mbuf_u32(&lc, LC_LOAD_DYLINKER); mbuf_u32(&lc, cmd_size); - mbuf_u32(&lc, 12u); /* name offset within cmd */ + mbuf_u32(&lc, 12u); /* name offset within cmd */ u32 wrote = mbuf_str(&lc, dyld, (u32)strlen(dyld)); (void)wrote; /* Pad to cmd_size. */ @@ -2369,17 +2412,17 @@ void link_emit_macho(LinkImage* img, Writer* w) { /* LC_BUILD_VERSION */ mbuf_u32(&lc, LC_BUILD_VERSION); mbuf_u32(&lc, 24); - mbuf_u32(&lc, 1); /* PLATFORM_MACOS */ - mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */ - mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */ - mbuf_u32(&lc, 0); /* ntools */ + mbuf_u32(&lc, 1); /* PLATFORM_MACOS */ + mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */ + mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */ + mbuf_u32(&lc, 0); /* ntools */ /* LC_MAIN — entryoff is offset within __TEXT segment from its file * start (0). */ mbuf_u32(&lc, LC_MAIN); mbuf_u32(&lc, 24); - mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */ - mbuf_u64(&lc, 0); /* stacksize */ + mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */ + mbuf_u64(&lc, 0); /* stacksize */ /* LC_LOAD_DYLIB per dylib. */ for (u32 i = 0; i < x.ndylibs; ++i) { @@ -2389,10 +2432,10 @@ void link_emit_macho(LinkImage* img, Writer* w) { u32 cmd_start = lc.len; mbuf_u32(&lc, LC_LOAD_DYLIB); mbuf_u32(&lc, cmd_size); - mbuf_u32(&lc, 24u); /* name offset */ - mbuf_u32(&lc, 0); /* timestamp */ - mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */ - mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */ + mbuf_u32(&lc, 24u); /* name offset */ + mbuf_u32(&lc, 0); /* timestamp */ + mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */ + mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */ mbuf_str(&lc, nm ? nm : "", (u32)nl); while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0); } @@ -2435,10 +2478,10 @@ void link_emit_macho(LinkImage* img, Writer* w) { * + chained + exports_trie + symtab + dysymtab + dyld + uuid + * build_version + main + nDylibs + fn_starts + data_in_code + * codesig. */ - ncmds += 2; /* PAGEZERO + TEXT */ + ncmds += 2; /* PAGEZERO + TEXT */ if (x.segs[2].nsects > 0) ncmds++; if (x.segs[3].nsects > 0) ncmds++; - ncmds++; /* LINKEDIT */ + ncmds++; /* LINKEDIT */ ncmds += 11 + x.ndylibs; /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version, * main, fn_starts, data_in_code, codesig) = 11 */ @@ -2458,7 +2501,7 @@ void link_emit_macho(LinkImage* img, Writer* w) { if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS; mbuf_u32(&file, mh_flags); } - mbuf_u32(&file, 0); /* reserved */ + mbuf_u32(&file, 0); /* reserved */ mbuf_append(&file, lc.data, lc.len); /* Pad to first section's file offset. */ @@ -2477,8 +2520,8 @@ void link_emit_macho(LinkImage* img, Writer* w) { } else { LinkSection* ls = &img->sections[m->link_sec_id - 1u]; u32 segid = ls->segment_id; - u8* base = (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] - : NULL; + u8* base = + (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL; if (base && ls->size) { mbuf_append(&file, base + ls->input_offset, (u32)ls->size); } else if (ls->size) { @@ -2529,7 +2572,7 @@ void link_emit_macho(LinkImage* img, Writer* w) { mbuf_fini(&x.fn_starts); mbuf_fini(&x.data_in_code); mbuf_fini(&x.codesig); - if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */ + if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */ if (x.dylibs) x.h->free(x.h, x.dylibs, 0); if (x.sym_to_imp) x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size); diff --git a/src/link/link_resolve.c b/src/link/link_resolve.c @@ -59,6 +59,11 @@ static int bind_strength(u8 bind) { } } +static int obj_sym_is_logical_undef(const ObjSym* s) { + return s && s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && + s->kind != SK_COMMON; +} + void link_resolve_symbols(Linker* l, LinkImage* img) { u32 ii; for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { @@ -86,8 +91,7 @@ void link_resolve_symbols(Linker* l, LinkImage* img) { LinkSymbol rec; LinkSymId existing; { - int is_logical_undef = (s->section_id == OBJ_SEC_NONE) && - (s->kind != SK_ABS) && (s->kind != SK_COMMON); + int is_logical_undef = obj_sym_is_logical_undef(s); if (is_logical_undef && !s->referenced && (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { continue; @@ -494,7 +498,8 @@ void link_gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) { /* ---- archive ingestion ---- */ -static void include_archive_member(Linker* l, LinkArchiveMember* mem) { +static void include_archive_member(Linker* l, const LinkArchive* ar, + LinkArchiveMember* mem) { LinkInput* in; LinkInputId id; u32 idx; @@ -505,24 +510,28 @@ static void include_archive_member(Linker* l, LinkArchiveMember* mem) { id = (LinkInputId)(idx + 1u); in->id = id; in->kind = LINK_INPUT_OBJ_BYTES; + in->order = ar->order; in->obj = mem->obj; in->name = mem->name; mem->included = 1; mem->obj = NULL; } -static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) { +static void scan_presence_before(Linker* l, u32 max_order, SymHash* defined, + SymHash* undefs) { u32 ii; ObjSymIter* it; ObjSymEntry e; for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + LinkInput* in = LinkInputs_at(&l->inputs, ii); + ObjBuilder* ob = in->obj; + if (!ob || in->order > max_order) continue; it = obj_symiter_new(ob); while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; if (s->name == 0) continue; if (s->bind == SB_LOCAL) continue; - if (s->kind == SK_UNDEF) + if (obj_sym_is_logical_undef(s)) symhash_set(undefs, s->name, 1u); else symhash_set(defined, s->name, 1u); @@ -531,12 +540,14 @@ static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) { } } -static int inputs_have_defined_ifunc(Linker* l) { +static int inputs_have_defined_ifunc_before(Linker* l, u32 max_order) { u32 ii; ObjSymIter* it; ObjSymEntry e; for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + LinkInput* in = LinkInputs_at(&l->inputs, ii); + ObjBuilder* ob = in->obj; + if (!ob || in->order > max_order) continue; it = obj_symiter_new(ob); while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; @@ -578,37 +589,35 @@ void link_ingest_archives(Linker* l) { LinkArchive* ar = LinkArchives_at(&l->archives, a); if (!ar->whole_archive) continue; for (m = 0; m < ar->nmembers; ++m) - include_archive_member(l, &ar->members[m]); + include_archive_member(l, ar, &ar->members[m]); } - Sym want_ifunc_init = 0; - if (l->emit_static_exe && inputs_have_defined_ifunc(l)) { - want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); - } + for (a = 0; a < LinkArchives_count(&l->archives); ++a) { + LinkArchive* ar = LinkArchives_at(&l->archives, a); + Sym want_ifunc_init = 0; + if (ar->whole_archive) continue; + if (l->emit_static_exe && inputs_have_defined_ifunc_before(l, ar->order)) + want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); + for (;;) { + SymHash defined, undefs; + int changed = 0; + symhash_init(&defined, l->heap); + symhash_init(&undefs, l->heap); + scan_presence_before(l, ar->order, &defined, &undefs); + if (want_ifunc_init != 0 && + symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) + symhash_set(&undefs, want_ifunc_init, 1u); - for (;;) { - SymHash defined, undefs; - int changed = 0; - symhash_init(&defined, l->heap); - symhash_init(&undefs, l->heap); - scan_presence(l, &defined, &undefs); - if (want_ifunc_init != 0 && - symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) - symhash_set(&undefs, want_ifunc_init, 1u); - - for (a = 0; a < LinkArchives_count(&l->archives); ++a) { - LinkArchive* ar = LinkArchives_at(&l->archives, a); - if (ar->whole_archive) continue; for (m = 0; m < ar->nmembers; ++m) { LinkArchiveMember* mem = &ar->members[m]; if (mem->included) continue; if (!member_satisfies(mem, &defined, &undefs)) continue; - include_archive_member(l, mem); + include_archive_member(l, ar, mem); changed = 1; } + symhash_fini(&defined); + symhash_fini(&undefs); + if (!changed) break; } - symhash_fini(&defined); - symhash_fini(&undefs); - if (!changed) break; } } diff --git a/test/link/cases/37_macho_ro_const_abs64/a.aa64.S b/test/link/cases/37_macho_ro_const_abs64/a.aa64.S @@ -0,0 +1,17 @@ +.section __TEXT,__const +.p2align 3 +_ro_const_ptr: + .quad _ro_const_value + +.p2align 2 +_ro_const_value: + .long 7 + +.text +.globl _test_main +.p2align 2 +_test_main: + adrp x8, _ro_const_ptr@PAGE + ldr x8, [x8, _ro_const_ptr@PAGEOFF] + ldr w0, [x8] + ret diff --git a/test/link/cases/37_macho_ro_const_abs64/expected b/test/link/cases/37_macho_ro_const_abs64/expected @@ -0,0 +1 @@ +7 diff --git a/test/link/cases/37_macho_ro_const_abs64/j_targets b/test/link/cases/37_macho_ro_const_abs64/j_targets @@ -0,0 +1 @@ + diff --git a/test/link/cases/37_macho_ro_const_abs64/targets b/test/link/cases/37_macho_ro_const_abs64/targets @@ -0,0 +1 @@ +aa64-macho