commit f80f807bc643e0821ae67619e3d8806eedf671cc parent e2487bfe317a6eb027d4c5f7f2d5e223adf6f447 Author: Ryan Sepassi <rsepassi@gmail.com> Date: Fri, 1 May 2026 17:09:59 -0700 cc: fix nine bugs across lex/pp/cg/parse/types Bugs found and fixed (red->green test per bug): Lexer/PP: - 250 # stringize used internal symbol names for punctuators (`+` -> "plus"); now uses source spelling via reverse map over %punct-alist. - 250 # stringize inserted unconditional spaces between every pair of tokens; now loc-based abutment check (C11 6.10.3.2 p2). - 251 # stringize lost embedded escapes from cooked STR/CHAR; restore \n / \t / \r in %pp-quote-bytes. - 252 __LINE__ / __FILE__ in a macro body reported the #define line; thread call-loc through %pp-prepare-body and stamp those builtins with invocation loc (C11 6.10.8). Codegen: - 270 narrow-type arithmetic overflow leaked high bits; cg-binop now sext/zext the result to result-ty before spill (skip compare ops, which already yield 0/1). - 271 mixed-sign comparisons read stale high bits because cg-arith-conv only relabels; %cg-load-opnd-into now canonicalizes frame rvals on load per current type. Parser: - 290 constant-expression ternary `?:` eagerly evaluated both arms (so `1 ? 7 : 1/0` aborted); skip-scan the dead arm, mirroring && / || short-circuit (C11 6.6 p3, 6.5.15/4). - 291 `continue` inside `do { ... } while (c);` re-ran the body instead of testing the cond; hand-roll the loop layout so ::tag_top labels the cond test (C11 6.8.6.2 p2). Type system: - 310 inner-scope `struct TAG { ... }` mutated the outer-scope same-tag ctype via tag-lookup walking all frames; restrict reuse to the top tag frame and reject same-scope redefinition. Also: tests/cc-cg/37-struct-store cast u8 loads to i32 so cg-binop's new narrow-result canon doesn't truncate the checksum (mirrors the parser's cg-arith-conv discipline). Suites: cc 166/166, cc-cg 51/51, cc-lex 16/16, cc-pp 34/34 on aarch64. Diffstat:
18 files changed, 419 insertions(+), 42 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm @@ -1902,7 +1902,8 @@ ((not m) (%pp-relocate t st)) ((eq? (macro-kind m) 'obj) (let ((body (%pp-prepare-body (macro-body m) - (cons name (tok-hide t))))) + (cons name (tok-hide t)) + (tok-loc t)))) (%pp-unshift-upstream! st body) (%pp-dispatch-step st))) (else @@ -1922,7 +1923,8 @@ (env (%pp-bind-args params args variadic? (tok-loc t))) (sub (%pp-substitute (macro-body m) env (tok-loc t) st)) (body (%pp-prepare-body sub - (cons name (tok-hide t))))) + (cons name (tok-hide t)) + (tok-loc t)))) (%pp-unshift-upstream! st body) (%pp-dispatch-step st)))))))))))) (else (%pp-relocate t st))))) @@ -2155,15 +2157,41 @@ (msg (%pp-toks->display msg-toks))) (die loc "#error" msg))) +;; Per C11 §6.10.3.2 ¶2: whitespace between argument tokens becomes a +;; single space; absence of whitespace must NOT introduce one. We +;; approximate "had whitespace" by comparing locations: a space goes +;; in iff the next token does not abut the previous one (different +;; line, or column gap larger than the prev spelling length). (define (%pp-toks->display toks) - (let loop ((toks toks) (acc '()) (first? #t)) + (let loop ((toks toks) (prev #f) (prev-bv #f) (acc '())) (cond ((null? toks) (bv-cat (reverse acc))) (else - (let ((p (%pp-tok->bv (car toks)))) - (loop (cdr toks) - (if first? (cons p acc) (cons p (cons " " acc))) - #f)))))) + (let* ((t (car toks)) (p (%pp-tok->bv t)) + (sep? (cond + ((not prev) #f) + ((or (not (tok-loc prev)) (not (tok-loc t))) #t) + ((not (= (loc-line (tok-loc prev)) + (loc-line (tok-loc t)))) #t) + (else + (not (= (loc-col (tok-loc t)) + (+ (loc-col (tok-loc prev)) + (bytevector-length prev-bv)))))))) + (loop (cdr toks) t p + (if sep? (cons p (cons " " acc)) (cons p acc)))))))) + +;; Reverse-map punctuator symbol -> source spelling. %punct-alist may +;; map several spellings to the same symbol (e.g. both "[" and "<:" +;; resolve to 'lbrack); the 1-byte canonical forms appear last in the +;; source list, so a last-wins fold yields "[" rather than the digraph. +(define %pp-punct-spell + (let loop ((al %punct-alist) (acc '())) + (cond ((null? al) acc) + (else (loop (cdr al) + (alist-set (cdr (car al)) (car (car al)) acc)))))) + +(define (%pp-punct-spelling sym) + (or (alist-ref/eq sym %pp-punct-spell) (symbol->string sym))) (define (%pp-tok->bv t) (let ((k (tok-kind t)) (v (tok-value t))) @@ -2173,12 +2201,14 @@ ((eq? k 'STR) (%pp-quote-bytes v 34)) ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39)) ((eq? k 'KW) (symbol->string v)) - ((eq? k 'PUNCT) (symbol->string v)) + ((eq? k 'PUNCT) (%pp-punct-spelling v)) (else "?")))) ;; Reconstruct a string/char literal source spelling from cooked content. -;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char). -;; `delim` is 34 for STR, 39 for CHAR. +;; Per C11 6.10.3.2: stringize must reproduce the source spelling of +;; STR/CHAR constants — every `"` and `\` is prefixed with `\`, and +;; the common control-character escapes are restored from their cooked +;; bytes. `delim` is 34 for STR, 39 for CHAR. (define (%pp-quote-bytes bv delim) (let* ((n (bytevector-length bv)) (delim-bv (bv-of-byte delim))) @@ -2190,6 +2220,9 @@ (cond ((or (= b delim) (= b 92)) (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc)))) + ((= b 10) (loop (+ i 1) (cons "\\n" acc))) + ((= b 9) (loop (+ i 1) (cons "\\t" acc))) + ((= b 13) (loop (+ i 1) (cons "\\r" acc))) (else (loop (+ i 1) (cons (bv-of-byte b) acc)))))))))) @@ -2257,7 +2290,8 @@ (cond ((eq? kind 'obj) (let ((bodies (%pp-prepare-body (macro-body m) - (cons name (tok-hide t))))) + (cons name (tok-hide t)) + (tok-loc t)))) (%pp-emit-expanded bodies state out) (cont rest))) (else @@ -2274,14 +2308,29 @@ (env (%pp-bind-args params args variadic? (tok-loc t))) (sub (%pp-substitute (macro-body m) env (tok-loc t) state)) (bodies (%pp-prepare-body sub - (cons name (tok-hide t))))) + (cons name (tok-hide t)) + (tok-loc t)))) (%pp-emit-expanded bodies state out) (cont rest2))))))))) -(define (%pp-prepare-body body extra-hide) - (map (lambda (t) - (%pp-with-hide t (%pp-bv-union extra-hide (tok-hide t)))) - body)) +;; Stamp built-in marker tokens (__LINE__ / __FILE__) inside the body +;; with the macro-invocation location, so they report the call site +;; per C11 §6.10.8. Other body tokens keep their #define-time loc so +;; diagnostics still point at the macro body. Hide-set is updated +;; with the macro name on every token. +(define (%pp-prepare-body body extra-hide . call-loc-opt) + (let ((call-loc (cond ((null? call-loc-opt) #f) + (else (car call-loc-opt))))) + (map (lambda (t) + (let ((hidden (%pp-with-hide t (%pp-bv-union extra-hide + (tok-hide t))))) + (cond + ((and call-loc (%pp-ident? hidden) + (or (bv= (tok-value hidden) %pp-bv-LINE) + (bv= (tok-value hidden) %pp-bv-FILE))) + (%pp-with-loc hidden call-loc)) + (else hidden)))) + body))) ;; Collect comma-separated args. `toks` starts AFTER `(`. Returns ;; (args . rest), where args is a list of token-lists. @@ -2715,6 +2764,31 @@ (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n)) (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off)) (%cg-emit-ld-slot-typed cg reg ty off)) + (($ opnd? (kind frame) (lval? #f) (type ,ty) (ext ,off)) + ;; Frame rval: spilled as 8 bytes, but the slot's bit-pattern may + ;; not be canonical for the opnd's CURRENT type (e.g. + ;; cg-arith-conv relabeled a signed slot as unsigned). Canonicalize + ;; on load so downstream 64-bit ALU/compare ops see the C-semantic + ;; value. + (%cg-emit-ld-slot cg reg off) + (let ((k (ctype-kind ty))) + (cond + ((eq? k 'i8) (%cg-emit-sext cg reg 56)) + ((eq? k 'i16) (%cg-emit-sext cg reg 48)) + ((eq? k 'i32) (%cg-emit-sext cg reg 32)) + ((eq? k 'u8) + (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ")\n"))) + ((eq? k 'u16) + (%cg-emit-many cg (list "%zext16(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ")\n"))) + ((eq? k 'u32) + (%cg-emit-many cg (list "%zext32(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", t1)\n"))) + ((eq? k 'bool) + (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ")\n"))) + (else 0)))) (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off)) (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl)) (($ opnd? (kind global) (type ,ty) (ext ,lbl)) @@ -3492,6 +3566,28 @@ ((eq? op 'le) (%cg-emit-cmp cg (if unsigned? "leu" "le") 'a0 'a1 't0)) ((eq? op 'ge) (%cg-emit-cmp cg (if unsigned? "geu" "ge") 'a0 'a1 't0)) (else (die #f "cg-binop: unknown op" op))) + ;; Canonicalize narrow integer results to their type's bit width + ;; before spilling, so the slot's bit-pattern matches result-ty. + ;; Compare ops already yield 0/1; skip them. Pointer-arith branches + ;; above don't reach here. + (cond + ((or (eq? op 'eq) (eq? op 'ne) + (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) 0) + (else + (let ((k (ctype-kind result-ty))) + (cond + ((eq? k 'i8) (%cg-emit-sext cg 't0 56)) + ((eq? k 'i16) (%cg-emit-sext cg 't0 48)) + ((eq? k 'i32) (%cg-emit-sext cg 't0 32)) + ((eq? k 'u8) + (%cg-emit-many cg (list "%zext8(t0, t0)\n"))) + ((eq? k 'u16) + (%cg-emit-many cg (list "%zext16(t0, t0)\n"))) + ((eq? k 'u32) + (%cg-emit-many cg (list "%zext32(t0, t0, t1)\n"))) + ((eq? k 'bool) + (%cg-emit-many cg (list "%zext8(t0, t0)\n"))) + (else 0))))) (%cg-spill-reg cg 't0 result-ty))))) ;; Post-increment / post-decrement on the top-of-vstack lval. @@ -4300,8 +4396,18 @@ (cond ((at-punct? ps 'lbrace) (advance ps) - (let* ((ex (and tag (tag-lookup ps tag))) - (ct (cond ((and ex (eq? (ctype-kind ex) kind)) ex) + ;; A `struct/union TAG { ... }` declaration introduces (or + ;; completes) the tag in the *current* scope. Looking up in + ;; outer scopes via tag-lookup would let an inner-scope + ;; definition mutate an outer-scope same-tag ctype via + ;; complete-agg!. Restrict the reuse to the top frame, and + ;; only when the existing tag is still incomplete (size < 0); + ;; otherwise this is an attempted redefinition. + (let* ((ex (and tag (alist-ref tag (car (ps-tags ps))))) + (ct (cond ((and ex (eq? (ctype-kind ex) kind) + (< (ctype-size ex) 0)) ex) + ((and ex (eq? (ctype-kind ex) kind)) + (die (tok-loc (peek ps)) "agg redefinition" tag)) (else (let ((c (%ctype kind -1 -1 (list (or tag #f) #f '())))) (if tag (tag-bind! ps tag c)) c)))) @@ -4488,18 +4594,85 @@ (define (parse-const-expr ps) (parse-const-cond ps)) -;; Ternary (right-associative). +;; Ternary (right-associative). Per C11 §6.6 ¶3 + §6.5.15/4 only the +;; chosen branch is evaluated; the other need not be a valid constant +;; expression (e.g. `1 ? 2 : 1/0` must yield 2, not abort). The dead +;; arm is skipped via %const-skip-cond-{mid,rhs}, like the &&/|| +;; short-circuit paths above. (define (parse-const-cond ps) (let ((c (parse-const-lor ps))) (cond ((at-punct? ps 'qmark) (advance ps) - (let* ((t (parse-const-expr ps)) - (_ (expect-punct ps 'colon)) - (e (parse-const-cond ps))) - (cond ((%const-bool? c) t) (else e)))) + (cond + ((%const-bool? c) + (let* ((t (parse-const-expr ps)) + (_ (expect-punct ps 'colon))) + (%const-skip-cond-rhs ps) + t)) + (else + (%const-skip-cond-mid ps) + (expect-punct ps 'colon) + (parse-const-cond ps)))) (else c)))) +;; Skip the middle of a ternary whose condition was false. Stop on the +;; matching `:` at depth 0; nested `?:` pairs are absorbed by tracking +;; an unmatched-? counter. +(define (%const-skip-cond-mid ps) + (let lp ((d 0) (q 0)) + (let ((t (peek ps))) + (cond + ((eq? (tok-kind t) 'EOF) #t) + ((not (eq? (tok-kind t) 'PUNCT)) + (advance ps) (lp d q)) + (else + (let ((v (tok-value t))) + (cond + ((or (eq? v 'lparen) (eq? v 'lbrack)) + (advance ps) (lp (+ d 1) q)) + ((or (eq? v 'rparen) (eq? v 'rbrack)) + (cond ((zero? d) #t) + (else (advance ps) (lp (- d 1) q)))) + ((and (zero? d) (eq? v 'qmark)) + (advance ps) (lp d (+ q 1))) + ((and (zero? d) (eq? v 'colon) (zero? q)) #t) + ((and (zero? d) (eq? v 'colon)) + (advance ps) (lp d (- q 1))) + ((and (zero? d) (zero? q) + (or (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace))) + #t) + (else (advance ps) (lp d q))))))))) + +;; Skip the third arm of a ternary whose condition was true. Third arm +;; is a conditional-expression so it may itself contain nested `?:` +;; pairs that we absorb. Stop on comma/semi/rbrace at depth 0 with no +;; open `?`, or on a `:` that closes an outer ternary. +(define (%const-skip-cond-rhs ps) + (let lp ((d 0) (q 0)) + (let ((t (peek ps))) + (cond + ((eq? (tok-kind t) 'EOF) #t) + ((not (eq? (tok-kind t) 'PUNCT)) + (advance ps) (lp d q)) + (else + (let ((v (tok-value t))) + (cond + ((or (eq? v 'lparen) (eq? v 'lbrack)) + (advance ps) (lp (+ d 1) q)) + ((or (eq? v 'rparen) (eq? v 'rbrack)) + (cond ((zero? d) #t) + (else (advance ps) (lp (- d 1) q)))) + ((and (zero? d) (eq? v 'qmark)) + (advance ps) (lp d (+ q 1))) + ((and (zero? d) (eq? v 'colon) (> q 0)) + (advance ps) (lp d (- q 1))) + ((and (zero? d) (zero? q) + (or (eq? v 'colon) (eq? v 'comma) + (eq? v 'semi) (eq? v 'rbrace))) + #t) + (else (advance ps) (lp d q))))))))) + ;; Generic left-associative binary level. ;; ops: alist of punct-sym → (vp vp → vp). (define (%const-binl ps next ops) @@ -5908,21 +6081,36 @@ (define (parse-do-stmt ps) (expect-kw ps 'do) - ;; do-while needs its tag known *before* the body parses, so we - ;; capture it inside the body-thunk and stash it for pop-loop-ctx - ;; via a side cell. - (cg-loop (ps-cg ps) - (lambda () #t) - (lambda (tag) - (push-loop-ctx! ps 'do tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps) - (expect-kw ps 'while) (expect-punct ps 'lparen) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rparen) (expect-punct ps 'semi) - (cg-unop (ps-cg ps) 'lnot) - (cg-if (ps-cg ps) - (lambda () (cg-break (ps-cg ps) tag))))) + ;; `continue` in a do-while must jump to the *cond test* (C11 + ;; §6.8.6.2 ¶2), not to the top of the body. cg-continue jumps to + ;; ::tag_top, so we lay the loop out so that ::tag_top labels the + ;; cond test and the body lives between an entry-skip and a back + ;; edge. The macro %loop_tag isn't shaped right for this — emit + ;; raw P1pp here, mirroring parse-for-stmt's hand-rolled layout. + ;; + ;; Layout: + ;; ::tag_body + ;; <body> + ;; ::tag_top ; %continue(tag) jumps here + ;; <cond> + ;; %if_eqz(c, %break(tag)) + ;; %b(&::tag_body) + ;; ::tag_end + (let* ((cg (ps-cg ps)) + (tag (%cg-fresh-loop-tag cg))) + (%cg-emit-many cg (list "::" tag "_body\n")) + (push-loop-ctx! ps 'do tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps) + (expect-kw ps 'while) (expect-punct ps 'lparen) + (%cg-emit-many cg (list "::" tag "_top\n")) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rparen) (expect-punct ps 'semi) + (let ((c (cg-pop cg))) + (%cg-load-opnd-into cg c 't0) + (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n"))) + (%cg-emit-many cg (list "%b(&::" tag "_body)\n" + "::" tag "_end\n"))) #t) (define (parse-for-stmt ps) diff --git a/tests/cc-cg/37-struct-store.scm b/tests/cc-cg/37-struct-store.scm @@ -33,19 +33,21 @@ (cg-push-field cg "c") (cg-push-imm cg %t-u8 7) (cg-assign cg) (cg-pop cg) - ;; return (b.a + b.b*10 + b.c*100) == 753 + ;; return (b.a + b.b*10 + b.c*100) == 753. + ;; Cast each u8 load to i32 so cg-binop's narrow-result canon + ;; doesn't truncate the products (700 mod 256 = 188). (cg-push-sym cg sym-b) (cg-push-field cg "a") - (cg-load cg) + (cg-load cg) (cg-cast cg %t-i32) (cg-push-sym cg sym-b) (cg-push-field cg "b") - (cg-load cg) + (cg-load cg) (cg-cast cg %t-i32) (cg-push-imm cg %t-i32 10) (cg-binop cg 'mul) (cg-binop cg 'add) (cg-push-sym cg sym-b) (cg-push-field cg "c") - (cg-load cg) + (cg-load cg) (cg-cast cg %t-i32) (cg-push-imm cg %t-i32 100) (cg-binop cg 'mul) (cg-binop cg 'add) diff --git a/tests/cc/250-stringize-punct.c b/tests/cc/250-stringize-punct.c @@ -0,0 +1,28 @@ +/* Stringize (`#x`) must reproduce each token's source spelling + * (C11 §6.10.3.2). Two related bugs in %pp-toks->display / + * %pp-tok->bv: + * 1. PUNCT tokens were stringified by symbol->string, so `+` + * came out as "plus" instead of "+". + * 2. A space was inserted between every two tokens regardless + * of source whitespace, so `S(1+2)` produced "1 plus 2" + * where C11 requires "1+2" (no whitespace = no space). + * Spaces should only appear when the token sources don't abut. + */ + +#define S(x) #x + +static int slen(const char *s) { int n = 0; while (s[n] != 0) n++; return n; } +static int smatch(const char *a, const char *b) { + int i = 0; + while (a[i] != 0 && b[i] != 0) { if (a[i] != b[i]) return 0; i++; } + return a[i] == b[i]; +} + +int main(void) { + if (!smatch(S(1+2), "1+2")) return 1; + if (!smatch(S(a*b), "a*b")) return 2; + if (!smatch(S(x->y), "x->y")) return 3; + /* Real whitespace becomes a single space. */ + if (!smatch(S(a + b), "a + b")) return 4; + return 0; +} diff --git a/tests/cc/250-stringize-punct.expected-exit b/tests/cc/250-stringize-punct.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/251-stringize-str-escape.c b/tests/cc/251-stringize-str-escape.c @@ -0,0 +1,29 @@ +/* Stringize of a string-literal argument must reproduce the source + * spelling, with `"` and `\` prefixed by `\` (C11 §6.10.3.2 ¶2). + * The lexer cooks string content, so `"a\nb"` becomes 3 bytes + * a,0x0A,b. Stringizing that requires re-escaping the embedded LF + * back to `\n`. The buggy `%pp-quote-bytes` only escaped `"` and + * `\` and emitted control bytes raw, producing a string with a + * literal newline byte (length 5) where C11 requires the 6-char + * spelling \"a\\nb\". + */ + +#define S(x) #x + +static int slen(const char *s) { int n = 0; while (s[n] != 0) n++; return n; } + +int main(void) { + const char *s = S("a\nb"); + /* Expected: "a\nb" -> 6 source-spelling chars. The cooked bytes + * of that 6-char literal are: " a \ n b " (still 6 distinct + * bytes) because the produced source-spelling itself escapes + * the LF as \\ + n. */ + if (slen(s) != 6) return 1; + if (s[0] != '"') return 2; + if (s[1] != 'a') return 3; + if (s[2] != '\\') return 4; + if (s[3] != 'n') return 5; + if (s[4] != 'b') return 6; + if (s[5] != '"') return 7; + return 0; +} diff --git a/tests/cc/251-stringize-str-escape.expected-exit b/tests/cc/251-stringize-str-escape.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/252-line-in-macro.c b/tests/cc/252-line-in-macro.c @@ -0,0 +1,18 @@ +/* `__LINE__` inside a macro body must report the line of the macro + * INVOCATION, not the line of the `#define` (C11 §6.10.8.1). The + * implementation kept body tokens' original locations across + * substitution; %pp-expand-builtin then read the body line, so + * HERE expanded with the body's line (the `#define` line) instead + * of the call site. + */ + +#define HERE __LINE__ + +int main(void) { + int a = HERE; /* this is line 12 */ + int b = HERE; /* this is line 13 */ + if (a != 12) return 1; + if (b != 13) return 2; + if (b - a != 1) return 3; + return 0; +} diff --git a/tests/cc/252-line-in-macro.expected-exit b/tests/cc/252-line-in-macro.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/270-unsigned-overflow-cmp.c b/tests/cc/270-unsigned-overflow-cmp.c @@ -0,0 +1,15 @@ +/* tests/cc/270-unsigned-overflow-cmp.c — arithmetic on unsigned narrow + * types must wrap modulo 2^width before being used in comparisons. + * cg-binop emits 64-bit ALU ops and spills the unmasked result to a + * frame rval slot; reload of a frame rval uses %cg-emit-ld-slot (8-byte + * untyped load), so the overflowed high bits leak past width and corrupt + * comparisons against the wrapped value. Returns 0 when correct. */ +int main(void) { + unsigned int a = 0xFFFFFFFFu; + unsigned int b = 1u; + /* (a + b) wraps to 0u; (0u != 0) must be false. With the bug the + * sum's bit 32 survives the spill/reload and the compare returns 1. */ + if ((a + b) != 0u) return 1; + if ((a + b) >= 1u) return 2; + return 0; +} diff --git a/tests/cc/270-unsigned-overflow-cmp.expected-exit b/tests/cc/270-unsigned-overflow-cmp.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/271-mixed-sign-cmp.c b/tests/cc/271-mixed-sign-cmp.c @@ -0,0 +1,20 @@ +/* tests/cc/271-mixed-sign-cmp.c — comparing a signed value to an + * unsigned value of the same rank. C's usual-arithmetic-conversions + * relabel both as the unsigned type; with a sign-extended canonical + * slot for the signed operand the high bits leak into the 64-bit + * compare and corrupt the result. Returns 0 when correct. */ +int main(void) { + int i = -1; /* slot = 0xFFFFFFFFFFFFFFFF */ + unsigned int u = 0xFFFFFFFFu; /* slot = 0x00000000FFFFFFFF */ + /* Per C: i is converted to (unsigned)i = 0xFFFFFFFFu; the equality + * must hold. With the bug, eq compares the raw 64-bit slots and + * the test fails. */ + if (i != u) return 1; + if (!(i == u)) return 2; + /* Same idea for relational: (unsigned)-1 < 1 must be false. The + * declared unsigned variable forces the codegen path even though + * the parser may not preserve the literal U suffix. */ + unsigned int one = 1; + if (i < one) return 3; + return 0; +} diff --git a/tests/cc/271-mixed-sign-cmp.expected-exit b/tests/cc/271-mixed-sign-cmp.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/290-parse-const-ternary-shortcircuit.c b/tests/cc/290-parse-const-ternary-shortcircuit.c @@ -0,0 +1,15 @@ +/* Constant-expression ternary `?:` must evaluate only the chosen + * branch (C11 §6.6 ¶3 + §6.5.15/4): the unevaluated subexpression + * need not be a valid constant expression. parse-const-cond was + * eagerly evaluating both arms, so `1 ? 2 : 1/0` aborted with + * "const-expr: divide by zero" instead of returning 2. + */ + +enum { TRUE_ARM = 1 ? 7 : 1/0 }; +enum { FALSE_ARM = 0 ? 1/0 : 11 }; + +int main(int argc, char **argv) { + if (TRUE_ARM != 7) return 1; + if (FALSE_ARM != 11) return 2; + return 0; +} diff --git a/tests/cc/290-parse-const-ternary-shortcircuit.expected-exit b/tests/cc/290-parse-const-ternary-shortcircuit.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/291-do-while-continue.c b/tests/cc/291-do-while-continue.c @@ -0,0 +1,25 @@ +/* `continue` in a do-while loop must transfer control to the *cond* + * test (C11 §6.8.6.2), not back to the top of the body. parse-do-stmt + * placed both the body and the cond test inside the cg-loop body + * thunk — but cg-loop's underlying %loop_tag macro labels the top of + * its body as `tag_top`, and `%continue(tag)` jumps there. The result + * was that a `continue` re-entered the body from the start, executing + * one extra iteration of the body before the cond was finally tested. + * + * Detect: with the buggy semantics, `continue` followed by a + * normally-terminating cond test bypasses the test and runs the body + * once more. Below, the `continue` fires when n==3 — at that point + * the cond `n < 3` is already false, so a correct compiler exits the + * loop and leaves n=3. The buggy compiler instead restarts the body + * (n becomes 4) before the cond finally fires. + */ + +int main(int argc, char **argv) { + int n = 0; + do { + n = n + 1; + if (n == 3) continue; + } while (n < 3); + if (n != 3) return 1; + return 0; +} diff --git a/tests/cc/291-do-while-continue.expected-exit b/tests/cc/291-do-while-continue.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/310-tag-shadow-inner-scope.c b/tests/cc/310-tag-shadow-inner-scope.c @@ -0,0 +1,29 @@ +/* C 6.7.2.3p2 + 6.2.1: a struct/union tag introduced inside a nested + * block scope must not redefine the same-named tag in an outer scope. + * cc.scm's parse-aggregate-spec calls tag-lookup (which walks all + * frames) and then complete-agg! mutates the result in place — so an + * inner `struct S { ... };` clobbers the outer tag's fields/size. + */ + +struct S { int x; }; + +int before_size = sizeof(struct S); /* 4 */ + +int dummy(void) { + /* Shadow with a strictly larger struct so the bug, if present, + * would mutate the outer ctype's size to 24. */ + struct S { long y; long z; long w; }; + struct S s; + s.y = 1; s.z = 2; s.w = 3; + return (int)(s.y + s.z + s.w); +} + +int after_size = sizeof(struct S); /* must still be 4 */ + +int main(int argc, char **argv) { + if (before_size != 4) return 1; + if (dummy() != 6) return 2; + if (after_size != 4) return 3; + if (sizeof(struct S) != 4) return 4; + return 0; +} diff --git a/tests/cc/310-tag-shadow-inner-scope.expected-exit b/tests/cc/310-tag-shadow-inner-scope.expected-exit @@ -0,0 +1 @@ +0