commit e8d3d15a25a1195b1b4cfa3d4f5bc513f3483a2b
parent db08235c3ef5e143f15c843c191dd2d9e08f446b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 11:18:28 -0700
hex2pp
Diffstat:
| A | docs/HEX2pp.md | | | 170 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | hex2pp/hex2pp.c | | | 750 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 920 insertions(+), 0 deletions(-)
diff --git a/docs/HEX2pp.md b/docs/HEX2pp.md
@@ -0,0 +1,170 @@
+# hex2++
+
+A small, byte-oriented assembler/linker that takes hex source with labels and
+references and emits a flat binary. Implemented in P1; used by `cc.scm` and
+the P1 backends as the final stage of the `M1pp → M1 → hex2++` toolchain.
+
+## Invocation
+
+```
+hex2++ (-f|--file) FILE [(-f|--file) FILE ...]
+ [-o|--output OUT]
+ [-B|--base-address ADDR]
+ [--big-endian | --little-endian]
+ [-b|--binary] # default is hex
+ [--non-executable]
+```
+
+Output is one flat binary written from `Base_Address` upward. Multiple `-f`
+files are concatenated in argv order. Unless `--non-executable` is set and
+the output is a regular file, the output is `chmod 0750`'d.
+
+There is no per-target configuration. Any target-specific encoding (RISC-V
+bitfield-scattered immediates, native branch displacements, etc.) is the
+responsibility of the upstream M1pp layer, which packs full instruction
+words at expansion time. hex2++ sees only contiguous-byte values.
+
+## Lexical structure
+
+- **Whitespace** — space, tab, newline; separates tokens, otherwise ignored.
+- **Comments** — `#` or `;` to end of line.
+- **Byte mode** — chosen once at invocation:
+ - `HEX` (default): two hex digits → one byte. Digits `0-9 a-f A-F`.
+ - `BINARY` (`-b`): eight binary digits → one byte. Digits `0 1`.
+
+Bytes within a token may be separated by whitespace freely; only digit count
+matters.
+
+Active characters:
+
+```
+0-9 a-f A-F hex digits (HEX mode)
+0-1 binary digits (BINARY mode)
+: label definition
+. (+kw) directive (.align, .fill, .scope, .endscope)
+! @ $ ~ % & label reference
+- label arithmetic in references
+# ; line comment
+ws token separator
+```
+
+## Labels
+
+```
+:NAME define label NAME at the current emit position (ip)
+```
+
+Label names are tokens terminated by whitespace or `-`. Labels may be
+referenced before they are defined; forward references resolve in pass 2.
+
+The label namespace is global except that names beginning with `.` are
+*local* to the enclosing `.scope`. Local labels are distinguished from
+directives by the leading character of the token: `:.NAME` is a local
+definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare
+`.NAME` (no leading `:` or sigil) is a directive.
+
+```
+.scope
+ :.L1
+ ...
+ &.L1
+.endscope
+```
+
+- `.scope` directives may not nest.
+- Non-dotted labels defined inside a `.scope` remain global.
+- Dot-prefixed labels outside any `.scope` are an error.
+
+## Label references
+
+A reference is a single sigil character followed by a label expression:
+
+| Sigil | Width | Form | Range |
+|-------|-------|------|------------------------|
+| `!` | 1 B | rel | `-128..127` |
+| `@` | 2 B | rel | `-32768..32767` |
+| `$` | 2 B | abs | `0..65535` |
+| `~` | 3 B | rel | `-2^23..2^23-1` |
+| `%` | 4 B | rel | unchecked |
+| `&` | 4 B | abs | unchecked |
+
+- "rel" emits `target - base`, where `base` is `ip` immediately after the
+ reference's bytes are accounted for.
+- "abs" emits the target's absolute address (which includes `Base_Address`).
+- Multi-byte values are emitted little-endian unless `--big-endian` is set.
+
+The label expression takes one of two forms:
+
+```
+SIGIL LABEL # plain reference
+SIGIL LABEL - OTHER # emit target(LABEL) - target(OTHER)
+```
+
+The `LABEL - OTHER` form overrides the default base with another label, and
+applies uniformly to all sigils. Both labels must be defined somewhere in
+the input. Range checks apply identically to plain and arithmetic forms.
+
+Only one subtraction per reference; no addition, nesting, or
+parenthesization.
+
+Examples:
+
+```
+# jump table entries
+:jt
+ &case0-jt &case1-jt &case2-jt
+
+# string length prefix
+:s_begin
+ "hello"
+:s_end
+ &s_end-s_begin
+```
+
+## Directives
+
+### `.align N [PATTERN]`
+
+```
+.align N # pad to N-byte boundary with zero bytes
+.align N PATTERN # pad with the given byte/word pattern
+```
+
+- `N` is a positive power-of-two decimal integer.
+- `PATTERN`, if present, is a hex byte or hex word literal in the current
+ byte mode (e.g. `00`, `90`, `d503201f`). The pattern is repeated and
+ rotated as needed to fill the gap.
+- If `ip` is already aligned, no bytes are emitted.
+
+The pad pattern is supplied by whichever upstream layer knows the target
+(typically a per-backend M1pp macro). hex2++ stays target-neutral.
+
+### `.fill N B`
+
+```
+.fill N B # emit N copies of byte B
+```
+
+- `N` is a non-negative decimal integer.
+- `B` is one byte literal in the current byte mode.
+
+### `.scope` / `.endscope`
+
+See [Labels](#labels).
+
+## Implementation outline
+
+Two passes:
+
+- **Pass 1** — read every input file, advancing `ip` and recording label
+ definitions. `.align` and `.fill` advance `ip` deterministically;
+ `.scope` / `.endscope` push and pop the current scope id.
+- **Pass 2** — re-read, emit bytes, resolve references.
+
+The label table carries `(name, target_ip, scope_id)` entries. Lookup for a
+dotted name compares both name and current scope id; lookup for a non-dotted
+name ignores scope.
+
+Both labels in `LABEL-OTHER` have known addresses by the start of pass 2, so
+the subtraction is a single operation at emit time. No third pass is
+required.
diff --git a/hex2pp/hex2pp.c b/hex2pp/hex2pp.c
@@ -0,0 +1,750 @@
+/*
+ * hex2pp.c -- reference C implementation of hex2++.
+ *
+ * See docs/HEX2pp.md for the spec. Brief summary:
+ *
+ * Inputs are concatenated, scanned in two passes. Pass 1 records label
+ * definitions while advancing a position counter (ip). Pass 2 emits
+ * bytes, resolving label references against the table built in pass 1.
+ *
+ * Active syntax:
+ * digits in current byte mode -> raw bytes (HEX or BINARY)
+ * :NAME -> label definition
+ * SIGIL NAME [- OTHER] -> label reference (! @ $ ~ % &)
+ * .align N [PATTERN] -> pad to N-byte boundary
+ * .fill N B -> N copies of byte B
+ * .scope / .endscope -> local-label scope (no nesting)
+ * # ... / ; ... -> line comment
+ *
+ * Multi-byte reference values are emitted little-endian by default.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#define MAX_FILES 64
+#define MAX_INPUT_BYTES (16 * 1024 * 1024)
+#define MAX_OUTPUT_BYTES (128 * 1024 * 1024)
+#define MAX_LABELS (1 << 20)
+#define MAX_TEXT (8 * 1024 * 1024)
+#define MAX_TOKEN 4096
+
+enum { HEX_MODE, BINARY_MODE };
+
+struct InFile {
+ const char *path;
+ char *buf;
+ int len;
+};
+
+struct Label {
+ int name_off;
+ int name_len;
+ long long target_ip;
+ int scope_id; /* 0 = global */
+};
+
+static struct InFile inputs[MAX_FILES];
+static int input_count;
+
+static char text_buf[MAX_TEXT];
+static int text_used;
+
+static struct Label labels[MAX_LABELS];
+static int label_count;
+
+static unsigned char output_buf[MAX_OUTPUT_BYTES];
+static long long output_used;
+
+static long long ip;
+static long long base_address;
+static int byte_mode = HEX_MODE;
+static int big_endian;
+static int non_executable;
+static const char *output_path = "a.out";
+
+static int current_scope;
+static int scope_seq;
+static int in_scope;
+
+static int pass; /* 1 or 2 */
+static const char *cur_path;
+static int cur_line;
+
+/* --- error reporting ---------------------------------------------------- */
+
+static void die(const char *fmt, ...)
+{
+ va_list ap;
+ if (cur_path != NULL) {
+ fprintf(stderr, "%s:%d: hex2pp: ", cur_path, cur_line);
+ } else {
+ fprintf(stderr, "hex2pp: ");
+ }
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+/* --- text / label table ------------------------------------------------- */
+
+static int intern(const char *s, int len)
+{
+ int off;
+ if (text_used + len + 1 > MAX_TEXT) {
+ die("text pool overflow");
+ }
+ off = text_used;
+ memcpy(text_buf + off, s, (size_t)len);
+ text_buf[off + len] = '\0';
+ text_used += len + 1;
+ return off;
+}
+
+static int name_eq(const struct Label *L, const char *s, int len)
+{
+ return L->name_len == len && memcmp(text_buf + L->name_off, s, (size_t)len) == 0;
+}
+
+static void define_label(const char *s, int len, int scope_id)
+{
+ int i;
+ /* Duplicate-definition check (within the same scope namespace). */
+ for (i = 0; i < label_count; i++) {
+ if (labels[i].scope_id == scope_id && name_eq(&labels[i], s, len)) {
+ die("duplicate label '%.*s'", len, s);
+ }
+ }
+ if (label_count >= MAX_LABELS) {
+ die("too many labels");
+ }
+ labels[label_count].name_off = intern(s, len);
+ labels[label_count].name_len = len;
+ labels[label_count].target_ip = ip;
+ labels[label_count].scope_id = scope_id;
+ label_count++;
+}
+
+static long long lookup_label(const char *s, int len)
+{
+ int i;
+ int dotted = (len > 0 && s[0] == '.');
+ if (dotted) {
+ for (i = 0; i < label_count; i++) {
+ if (labels[i].scope_id == current_scope && name_eq(&labels[i], s, len)) {
+ return labels[i].target_ip;
+ }
+ }
+ die("undefined local label '%.*s'", len, s);
+ } else {
+ for (i = 0; i < label_count; i++) {
+ if (labels[i].scope_id == 0 && name_eq(&labels[i], s, len)) {
+ return labels[i].target_ip;
+ }
+ }
+ /* Also accept dotted labels referenced without sigil context?
+ * No -- non-dotted lookup only sees global. */
+ die("undefined label '%.*s'", len, s);
+ }
+ return 0; /* unreachable */
+}
+
+/* --- I/O ---------------------------------------------------------------- */
+
+static void emit_byte(unsigned b)
+{
+ if (pass != 2) {
+ ip++;
+ return;
+ }
+ if (output_used >= MAX_OUTPUT_BYTES) {
+ die("output overflow");
+ }
+ output_buf[output_used++] = (unsigned char)b;
+ ip++;
+}
+
+static void emit_value(long long v, int width, long long lo, long long hi, int range_check)
+{
+ int i;
+ unsigned char bytes[8];
+
+ if (range_check && (v < lo || v > hi)) {
+ die("reference out of range: value=%lld, allowed=[%lld,%lld]", v, lo, hi);
+ }
+ if (width < 1 || width > 8) {
+ die("internal: bad reference width %d", width);
+ }
+
+ /* Pack as little-endian into bytes[0..width-1]. */
+ for (i = 0; i < width; i++) {
+ bytes[i] = (unsigned char)((unsigned long long)v >> (8 * i)) & 0xff;
+ }
+ if (big_endian) {
+ /* Reverse for big-endian output. */
+ for (i = 0; i < width; i++) {
+ emit_byte(bytes[width - 1 - i]);
+ }
+ } else {
+ for (i = 0; i < width; i++) {
+ emit_byte(bytes[i]);
+ }
+ }
+}
+
+/* --- per-file scanner state -------------------------------------------- */
+
+struct Scanner {
+ const char *buf;
+ int len;
+ int pos;
+};
+
+static int eatc(struct Scanner *s)
+{
+ int c;
+ if (s->pos >= s->len) return -1;
+ c = (unsigned char)s->buf[s->pos++];
+ if (c == '\n') cur_line++;
+ return c;
+}
+
+static int is_space_any(int c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'; }
+
+static void skip_ws_and_comments(struct Scanner *s)
+{
+ int c;
+ while (s->pos < s->len) {
+ c = (unsigned char)s->buf[s->pos];
+ if (is_space_any(c)) {
+ eatc(s);
+ } else if (c == '#' || c == ';') {
+ while (s->pos < s->len && s->buf[s->pos] != '\n') s->pos++;
+ } else {
+ break;
+ }
+ }
+}
+
+/* --- byte-mode digit handling ----------------------------------------- */
+
+static int byte_digit_count(void)
+{
+ if (byte_mode == HEX_MODE) return 2;
+ return 8; /* BINARY */
+}
+
+static int is_byte_digit(int c)
+{
+ if (byte_mode == HEX_MODE) return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+ return c == '0' || c == '1';
+}
+
+static int byte_digit_value(int c)
+{
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return 10 + (c - 'a');
+ if (c >= 'A' && c <= 'F') return 10 + (c - 'A');
+ return -1;
+}
+
+static int parse_one_byte_literal(struct Scanner *s, unsigned char *out, int allow_multi, unsigned char *buf, int bufmax, int *outlen)
+{
+ /* Parse a contiguous run of byte-mode digits (no whitespace inside,
+ * since this is for directive arguments where digit-run terminates the
+ * argument). Returns number of bytes produced. */
+ int need = byte_digit_count();
+ int acc = 0;
+ int have = 0;
+ int produced = 0;
+ int c;
+
+ while (s->pos < s->len) {
+ c = (unsigned char)s->buf[s->pos];
+ if (!is_byte_digit(c)) break;
+ s->pos++;
+ if (byte_mode == HEX_MODE) acc = (acc << 4) | byte_digit_value(c);
+ else acc = (acc << 1) | (c - '0');
+ have++;
+ if (have == need) {
+ if (allow_multi) {
+ if (produced >= bufmax) die("pattern too large");
+ buf[produced++] = (unsigned char)(acc & 0xff);
+ } else {
+ if (produced > 0) die("byte literal: too many digits");
+ *out = (unsigned char)(acc & 0xff);
+ produced = 1;
+ }
+ acc = 0;
+ have = 0;
+ if (!allow_multi) break;
+ }
+ }
+ if (have != 0) die("byte literal: incomplete digits (%d left over)", have);
+ if (produced == 0) die("expected byte literal");
+ if (outlen) *outlen = produced;
+ return produced;
+}
+
+/* Parse a free-flowing byte stream: digits separated by arbitrary
+ * whitespace and comments. Stops at any non-digit non-whitespace
+ * non-comment character. */
+static void parse_byte_stream(struct Scanner *s)
+{
+ int need = byte_digit_count();
+ int acc = 0;
+ int have = 0;
+ int c;
+
+ for (;;) {
+ if (s->pos >= s->len) break;
+ c = (unsigned char)s->buf[s->pos];
+ if (is_space_any(c)) { eatc(s); continue; }
+ if (c == '#' || c == ';') {
+ while (s->pos < s->len && s->buf[s->pos] != '\n') s->pos++;
+ continue;
+ }
+ if (!is_byte_digit(c)) break;
+ s->pos++;
+ if (byte_mode == HEX_MODE) acc = (acc << 4) | byte_digit_value(c);
+ else acc = (acc << 1) | (c - '0');
+ have++;
+ if (have == need) {
+ emit_byte((unsigned)(acc & 0xff));
+ acc = 0;
+ have = 0;
+ }
+ }
+ if (have != 0) die("byte stream: incomplete digits at end of run (%d left over)", have);
+}
+
+/* --- name / token reading --------------------------------------------- */
+
+static int is_name_terminator(int c)
+{
+ /* Per spec: names terminated by whitespace or '-'. We also stop at
+ * end-of-line comments and EOF for safety. */
+ if (c < 0) return 1;
+ if (is_space_any(c)) return 1;
+ if (c == '-') return 1;
+ if (c == '#' || c == ';') return 1;
+ return 0;
+}
+
+static int read_name(struct Scanner *s, char *out, int max)
+{
+ int n = 0;
+ while (s->pos < s->len) {
+ int c = (unsigned char)s->buf[s->pos];
+ if (is_name_terminator(c)) break;
+ if (n >= max) die("name too long");
+ out[n++] = (char)c;
+ s->pos++;
+ }
+ if (n == 0) die("expected label name");
+ return n;
+}
+
+/* Decimal integer (for directive arity arguments). */
+static long long read_decimal(struct Scanner *s)
+{
+ long long v = 0;
+ int saw = 0;
+ int c;
+ while (s->pos < s->len) {
+ c = (unsigned char)s->buf[s->pos];
+ if (c < '0' || c > '9') break;
+ v = v * 10 + (c - '0');
+ saw = 1;
+ s->pos++;
+ }
+ if (!saw) die("expected decimal integer");
+ return v;
+}
+
+/* --- references ------------------------------------------------------- */
+
+struct SigilInfo {
+ int width;
+ int is_rel;
+ long long lo;
+ long long hi;
+ int range_check;
+};
+
+static struct SigilInfo sigil_info(int c)
+{
+ struct SigilInfo si = {0};
+ switch (c) {
+ case '!': si.width = 1; si.is_rel = 1; si.lo = -128; si.hi = 127; si.range_check = 1; break;
+ case '@': si.width = 2; si.is_rel = 1; si.lo = -32768; si.hi = 32767; si.range_check = 1; break;
+ case '$': si.width = 2; si.is_rel = 0; si.lo = 0; si.hi = 65535; si.range_check = 1; break;
+ case '~': si.width = 3; si.is_rel = 1; si.lo = -(1LL << 23); si.hi = (1LL << 23) - 1; si.range_check = 1; break;
+ case '%': si.width = 4; si.is_rel = 1; si.lo = 0; si.hi = 0; si.range_check = 0; break;
+ case '&': si.width = 4; si.is_rel = 0; si.lo = 0; si.hi = 0; si.range_check = 0; break;
+ default: die("internal: bad sigil 0x%02x", c);
+ }
+ return si;
+}
+
+static void process_reference(struct Scanner *s, int sigil)
+{
+ char label[MAX_TOKEN];
+ char other[MAX_TOKEN];
+ int llen, olen = 0;
+ int has_other = 0;
+ struct SigilInfo si = sigil_info(sigil);
+ long long value = 0;
+
+ /* Sigil already consumed. Read tight LABEL. */
+ if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) {
+ die("sigil '%c' not followed by label name", sigil);
+ }
+ llen = read_name(s, label, sizeof(label));
+
+ /* Optional '-' OTHER (tight, no whitespace). */
+ if (s->pos < s->len && s->buf[s->pos] == '-') {
+ s->pos++;
+ if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) {
+ die("'-' must be followed by label name");
+ }
+ olen = read_name(s, other, sizeof(other));
+ has_other = 1;
+ }
+
+ if (pass == 1) {
+ ip += si.width;
+ return;
+ }
+
+ /* Pass 2: compute value. */
+ {
+ long long t_label = lookup_label(label, llen);
+ if (has_other) {
+ long long t_other = lookup_label(other, olen);
+ value = t_label - t_other;
+ } else if (si.is_rel) {
+ /* base = ip immediately after the reference's bytes are accounted for */
+ value = t_label - (ip + si.width);
+ } else {
+ /* abs: target's absolute address (which includes Base_Address) */
+ value = t_label + base_address;
+ }
+ emit_value(value, si.width, si.lo, si.hi, si.range_check);
+ }
+}
+
+/* --- directives ------------------------------------------------------- */
+
+static int read_directive_name(struct Scanner *s, char *out, int max)
+{
+ /* '.' already consumed. Read alpha chars. */
+ int n = 0;
+ while (s->pos < s->len) {
+ int c = (unsigned char)s->buf[s->pos];
+ if (!isalpha(c)) break;
+ if (n >= max) die("directive name too long");
+ out[n++] = (char)c;
+ s->pos++;
+ }
+ if (n == 0) die("expected directive name after '.'");
+ return n;
+}
+
+static void skip_inline_ws(struct Scanner *s)
+{
+ /* Directive arguments do NOT cross newlines: `.align N PATTERN` ends
+ * at end-of-line, otherwise `.align 8\n cc` would slurp `cc` as
+ * pattern. Skip space/tab and inline comments only. */
+ int c;
+ while (s->pos < s->len) {
+ c = (unsigned char)s->buf[s->pos];
+ if (c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') {
+ s->pos++;
+ } else if (c == '#' || c == ';') {
+ while (s->pos < s->len && s->buf[s->pos] != '\n') s->pos++;
+ } else {
+ break;
+ }
+ }
+}
+
+static void do_align(struct Scanner *s)
+{
+ long long N;
+ long long pad;
+ long long target;
+ long long i;
+ unsigned char patbuf[MAX_TOKEN];
+ int patlen = 0;
+ int has_pattern = 0;
+ int c;
+
+ skip_inline_ws(s);
+ N = read_decimal(s);
+ if (N <= 0 || (N & (N - 1)) != 0) {
+ die(".align: N must be a positive power of two (got %lld)", N);
+ }
+
+ /* Optional pattern: peek -- if next non-ws is a byte digit, parse it. */
+ skip_inline_ws(s);
+ if (s->pos < s->len) {
+ c = (unsigned char)s->buf[s->pos];
+ if (is_byte_digit(c)) {
+ int dummy;
+ unsigned char dummyb;
+ (void)dummyb;
+ (void)dummy;
+ parse_one_byte_literal(s, NULL, 1, patbuf, (int)sizeof(patbuf), &patlen);
+ has_pattern = 1;
+ }
+ }
+
+ target = (ip + N - 1) & ~(N - 1);
+ pad = target - ip;
+ if (!has_pattern) {
+ for (i = 0; i < pad; i++) emit_byte(0);
+ } else {
+ for (i = 0; i < pad; i++) emit_byte(patbuf[i % patlen]);
+ }
+}
+
+static void do_fill(struct Scanner *s)
+{
+ long long N;
+ unsigned char b;
+ long long i;
+
+ skip_inline_ws(s);
+ N = read_decimal(s);
+ if (N < 0) die(".fill: N must be non-negative (got %lld)", N);
+ skip_inline_ws(s);
+ parse_one_byte_literal(s, &b, 0, NULL, 0, NULL);
+ for (i = 0; i < N; i++) emit_byte(b);
+}
+
+static void do_scope_open(void)
+{
+ if (in_scope) die(".scope: nested scopes are not permitted");
+ in_scope = 1;
+ scope_seq++;
+ current_scope = scope_seq;
+}
+
+static void do_scope_close(void)
+{
+ if (!in_scope) die(".endscope: not in a scope");
+ in_scope = 0;
+ current_scope = 0;
+}
+
+/* --- main scanner loop ------------------------------------------------ */
+
+static void process_file(struct InFile *f)
+{
+ struct Scanner s = { f->buf, f->len, 0 };
+ cur_path = f->path;
+ cur_line = 1;
+
+ for (;;) {
+ int c;
+ skip_ws_and_comments(&s);
+ if (s.pos >= s.len) break;
+ c = (unsigned char)s.buf[s.pos];
+
+ if (c == ':') {
+ char name[MAX_TOKEN];
+ int n;
+ int dotted;
+ int scope;
+ s.pos++;
+ n = read_name(&s, name, sizeof(name));
+ dotted = (n > 0 && name[0] == '.');
+ if (dotted && !in_scope) {
+ die("dot-prefixed label '%.*s' outside a .scope", n, name);
+ }
+ scope = dotted ? current_scope : 0;
+ if (pass == 1) define_label(name, n, scope);
+ continue;
+ }
+
+ if (c == '.') {
+ char dn[MAX_TOKEN];
+ int n;
+ s.pos++;
+ n = read_directive_name(&s, dn, sizeof(dn));
+ if (n == 5 && memcmp(dn, "align", 5) == 0) do_align(&s);
+ else if (n == 4 && memcmp(dn, "fill", 4) == 0) do_fill(&s);
+ else if (n == 5 && memcmp(dn, "scope", 5) == 0) do_scope_open();
+ else if (n == 8 && memcmp(dn, "endscope", 8) == 0) do_scope_close();
+ else die("unknown directive '.%.*s'", n, dn);
+ continue;
+ }
+
+ if (c == '!' || c == '@' || c == '$' || c == '~' || c == '%' || c == '&') {
+ s.pos++;
+ process_reference(&s, c);
+ continue;
+ }
+
+ if (is_byte_digit(c)) {
+ parse_byte_stream(&s);
+ continue;
+ }
+
+ die("unexpected character 0x%02x ('%c')", c, isprint(c) ? c : '?');
+ }
+}
+
+/* --- argument parsing & top-level ------------------------------------- */
+
+static long long parse_long(const char *s, const char *what)
+{
+ char *end;
+ long long v;
+ int base = 10;
+ if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) base = 16;
+ errno = 0;
+ v = strtoll(s, &end, base);
+ if (errno != 0 || *end != '\0') {
+ fprintf(stderr, "hex2pp: invalid %s: %s\n", what, s);
+ exit(1);
+ }
+ return v;
+}
+
+static void load_input(const char *path)
+{
+ FILE *fp;
+ long sz;
+ char *buf;
+
+ if (input_count >= MAX_FILES) {
+ fprintf(stderr, "hex2pp: too many input files\n");
+ exit(1);
+ }
+ fp = fopen(path, "rb");
+ if (fp == NULL) { perror(path); exit(1); }
+ if (fseek(fp, 0, SEEK_END) != 0) { perror(path); exit(1); }
+ sz = ftell(fp);
+ if (sz < 0) { perror(path); exit(1); }
+ if (sz > MAX_INPUT_BYTES) { fprintf(stderr, "%s: input too large\n", path); exit(1); }
+ rewind(fp);
+ buf = (char *)malloc((size_t)sz + 1);
+ if (buf == NULL) { fprintf(stderr, "out of memory\n"); exit(1); }
+ if (sz > 0 && fread(buf, 1, (size_t)sz, fp) != (size_t)sz) {
+ perror(path);
+ exit(1);
+ }
+ buf[sz] = '\0';
+ fclose(fp);
+
+ inputs[input_count].path = path;
+ inputs[input_count].buf = buf;
+ inputs[input_count].len = (int)sz;
+ input_count++;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s (-f|--file) FILE [(-f|--file) FILE ...]\n"
+ " [-o|--output OUT]\n"
+ " [-B|--base-address ADDR]\n"
+ " [--big-endian | --little-endian]\n"
+ " [-b|--binary]\n"
+ " [--non-executable]\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ const char *a = argv[i];
+ if (strcmp(a, "-f") == 0 || strcmp(a, "--file") == 0) {
+ if (++i >= argc) { usage(argv[0]); return 1; }
+ load_input(argv[i]);
+ } else if (strcmp(a, "-o") == 0 || strcmp(a, "--output") == 0) {
+ if (++i >= argc) { usage(argv[0]); return 1; }
+ output_path = argv[i];
+ } else if (strcmp(a, "-B") == 0 || strcmp(a, "--base-address") == 0) {
+ if (++i >= argc) { usage(argv[0]); return 1; }
+ base_address = parse_long(argv[i], "base address");
+ } else if (strcmp(a, "--big-endian") == 0) {
+ big_endian = 1;
+ } else if (strcmp(a, "--little-endian") == 0) {
+ big_endian = 0;
+ } else if (strcmp(a, "-b") == 0 || strcmp(a, "--binary") == 0) {
+ byte_mode = BINARY_MODE;
+ } else if (strcmp(a, "--non-executable") == 0) {
+ non_executable = 1;
+ } else if (strcmp(a, "-h") == 0 || strcmp(a, "--help") == 0) {
+ usage(argv[0]);
+ return 0;
+ } else {
+ fprintf(stderr, "hex2pp: unknown argument: %s\n", a);
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (input_count == 0) {
+ fprintf(stderr, "hex2pp: no input files\n");
+ usage(argv[0]);
+ return 1;
+ }
+
+ /* Pass 1: collect labels. */
+ pass = 1;
+ ip = 0;
+ current_scope = 0;
+ in_scope = 0;
+ scope_seq = 0;
+ for (i = 0; i < input_count; i++) {
+ process_file(&inputs[i]);
+ }
+ if (in_scope) die(".scope not closed at end of input");
+
+ /* Pass 2: emit. */
+ pass = 2;
+ ip = 0;
+ output_used = 0;
+ current_scope = 0;
+ in_scope = 0;
+ scope_seq = 0;
+ for (i = 0; i < input_count; i++) {
+ process_file(&inputs[i]);
+ }
+ if (in_scope) die(".scope not closed at end of input");
+
+ /* Write output. */
+ {
+ FILE *fp = fopen(output_path, "wb");
+ if (fp == NULL) { perror(output_path); return 1; }
+ if (output_used > 0 &&
+ fwrite(output_buf, 1, (size_t)output_used, fp) != (size_t)output_used) {
+ perror(output_path);
+ fclose(fp);
+ return 1;
+ }
+ fclose(fp);
+ }
+
+ if (!non_executable) {
+ struct stat st;
+ if (stat(output_path, &st) == 0 && S_ISREG(st.st_mode)) {
+ (void)chmod(output_path, 0750);
+ }
+ }
+
+ return 0;
+}