lint.sh (2815B)
1 #!/bin/sh 2 ## lint.sh — catch undefined P1 tokens before they reach M1. 3 ## 4 ## M1 silently passes undefined tokens through as literal text, so a 5 ## misspelled ld_r0,r4,0 (when only ld_r0,r4,8 is defined) produces 6 ## a runnable-but-SIGILL-ing binary with no diagnostic. This script 7 ## extracts every op-shaped token referenced in a .M1 program and 8 ## asserts each has a matching `DEFINE` in the per-arch defs file. 9 ## 10 ## After the 2026 rename, op tokens are lowercase identifiers that may 11 ## contain ',' for multi-operand forms (li_r0, mov_r4,r0, add_r1,r1,r2, 12 ## prologue_n3, sys_write). The tokenizer must skip `"…"` / `'…'` 13 ## quoted literals (can span lines and carry prose like "usage: lisp") 14 ## and `#`/`;` line comments, so the pass is written in Python. 15 ## 16 ## Usage: lint.sh <p1_arch.M1> <prog.M1> [<prog.M1> ...] 17 ## Exit: 0 on success; 1 + diagnostic on any missing token; 2 on misuse. 18 19 set -eu 20 21 if [ "$#" -lt 2 ]; then 22 echo "usage: $0 <p1_arch.M1> <prog.M1> [<prog.M1> ...]" >&2 23 exit 2 24 fi 25 26 exec python3 - "$@" <<'PYEOF' 27 import re 28 import sys 29 30 defs_path = sys.argv[1] 31 prog_paths = sys.argv[2:] 32 33 def defined_names(path): 34 names = set() 35 with open(path) as f: 36 for line in f: 37 parts = line.split(None, 2) 38 if len(parts) >= 2 and parts[0] == 'DEFINE': 39 names.add(parts[1]) 40 return names 41 42 def tokenize_source(text): 43 """Emit op-shaped lowercase tokens from M0/M1 source, skipping 44 `"…"` / `'…'` quoted literals and `#`/`;` line comments. Quotes may 45 span newlines (strings are line-unaware in M0), comments end at LF.""" 46 i, n = 0, len(text) 47 tokens = [] 48 cur = [] 49 def flush(): 50 if cur: 51 tokens.append(''.join(cur)) 52 cur.clear() 53 while i < n: 54 c = text[i] 55 if c == '"' or c == "'": 56 flush() 57 q = c 58 i += 1 59 while i < n and text[i] != q: 60 i += 1 61 i += 1 # consume closing quote (or end) 62 elif c == '#' or c == ';': 63 flush() 64 while i < n and text[i] != '\n': 65 i += 1 66 elif c.isspace(): 67 flush() 68 i += 1 69 else: 70 cur.append(c) 71 i += 1 72 flush() 73 return tokens 74 75 TOKEN_RE = re.compile(r'^[a-z][a-z0-9_,]*$') 76 77 defs = defined_names(defs_path) 78 used = set() 79 for p in prog_paths: 80 with open(p) as f: 81 for tok in tokenize_source(f.read()): 82 if TOKEN_RE.match(tok): 83 used.add(tok) 84 85 missing = sorted(t for t in used if t not in defs) 86 if missing: 87 sys.stderr.write( 88 f'error: P1 lint: undefined token(s) referenced in M1 source\n' 89 f' (defs file: {defs_path})\n' 90 ) 91 for m in missing: 92 sys.stderr.write(f' {m}\n') 93 sys.exit(1) 94 PYEOF