lint.sh (2884B)
1 #!/bin/sh 2 ## lint.sh — catch undefined P1 tokens before they reach M1. 3 ## 4 ## M1 silently passes undefined tokens through as literal text, so a 5 ## misspelled ld_r0,r4,0 (when only ld_r0,r4,8 is defined) produces 6 ## a runnable-but-SIGILL-ing binary with no diagnostic. This script 7 ## extracts every op-shaped token referenced in a .M1 program and 8 ## asserts each has a matching `DEFINE` in the per-arch defs file. 9 ## 10 ## After the 2026 rename, op tokens are lowercase identifiers that may 11 ## contain ',' for multi-operand forms (li_r0, mov_r4,r0, add_r1,r1,r2, 12 ## prologue_n3, sys_write). The tokenizer must skip `"…"` / `'…'` 13 ## quoted literals (can span lines and carry prose like "usage: lisp") 14 ## and `#`/`;` line comments, so the pass is written in Python. 15 ## 16 ## Env: ARCH=aarch64|amd64|riscv64 17 ## Usage: lint.sh <prog.M1> [<prog.M1> ...] 18 ## Exit: 0 on success; 1 + diagnostic on any missing token; 2 on misuse. 19 20 set -eu 21 22 : "${ARCH:?ARCH must be set}" 23 24 if [ "$#" -lt 1 ]; then 25 echo "usage: ARCH=<arch> $0 <prog.M1> [<prog.M1> ...]" >&2 26 exit 2 27 fi 28 29 exec python3 - "P1/P1-$ARCH.M1" "$@" <<'PYEOF' 30 import re 31 import sys 32 33 defs_path = sys.argv[1] 34 prog_paths = sys.argv[2:] 35 36 def defined_names(path): 37 names = set() 38 with open(path) as f: 39 for line in f: 40 parts = line.split(None, 2) 41 if len(parts) >= 2 and parts[0] == 'DEFINE': 42 names.add(parts[1]) 43 return names 44 45 def tokenize_source(text): 46 """Emit op-shaped lowercase tokens from M0/M1 source, skipping 47 `"…"` / `'…'` quoted literals and `#`/`;` line comments. Quotes may 48 span newlines (strings are line-unaware in M0), comments end at LF.""" 49 i, n = 0, len(text) 50 tokens = [] 51 cur = [] 52 def flush(): 53 if cur: 54 tokens.append(''.join(cur)) 55 cur.clear() 56 while i < n: 57 c = text[i] 58 if c == '"' or c == "'": 59 flush() 60 q = c 61 i += 1 62 while i < n and text[i] != q: 63 i += 1 64 i += 1 # consume closing quote (or end) 65 elif c == '#' or c == ';': 66 flush() 67 while i < n and text[i] != '\n': 68 i += 1 69 elif c.isspace(): 70 flush() 71 i += 1 72 else: 73 cur.append(c) 74 i += 1 75 flush() 76 return tokens 77 78 TOKEN_RE = re.compile(r'^[a-z][a-z0-9_,]*$') 79 80 defs = defined_names(defs_path) 81 used = set() 82 for p in prog_paths: 83 with open(p) as f: 84 for tok in tokenize_source(f.read()): 85 if TOKEN_RE.match(tok): 86 used.add(tok) 87 88 missing = sorted(t for t in used if t not in defs) 89 if missing: 90 sys.stderr.write( 91 f'error: P1 lint: undefined token(s) referenced in M1 source\n' 92 f' (defs file: {defs_path})\n' 93 ) 94 for m in missing: 95 sys.stderr.write(f' {m}\n') 96 sys.exit(1) 97 PYEOF