diff --git a/src/codegen/expressions/operators/binary.ts b/src/codegen/expressions/operators/binary.ts index d82ce727..a667036c 100644 --- a/src/codegen/expressions/operators/binary.ts +++ b/src/codegen/expressions/operators/binary.ts @@ -704,23 +704,33 @@ export class BinaryExpressionGenerator { const isEq = op === "===" || op === "=="; const cmpPred = isEq ? "eq" : "ne"; const validCmp = this.ctx.emitIcmp(cmpPred, "i8", charByte, `${charCode}`); - const validI32 = this.ctx.nextTemp(); - this.ctx.emit(`${validI32} = zext i1 ${validCmp} to i32`); this.ctx.emitBr(endLabel); this.ctx.emitLabel(oobLabel); - const oobVal = isEq ? "0" : "1"; + const oobI1 = isEq ? "false" : "true"; this.ctx.emitBr(endLabel); this.ctx.emitLabel(negLabel); - const negVal = isEq ? "0" : "1"; + const negI1 = isEq ? "false" : "true"; this.ctx.emitBr(endLabel); this.ctx.emitLabel(endLabel); - const resultI32 = this.ctx.nextTemp(); + // Merge as i1 so the branch-condition fast path can return directly. + const resultI1 = this.ctx.nextTemp(); this.ctx.emit( - `${resultI32} = phi i32 [${validI32}, %${cmpLabel}], [${oobVal}, %${oobLabel}], [${negVal}, %${negLabel}]`, + `${resultI1} = phi i1 [${validCmp}, %${cmpLabel}], [${oobI1}, %${oobLabel}], [${negI1}, %${negLabel}]`, ); + + // If a branch condition wants the raw i1, return it directly — avoids + // the round-trip through i32 → double → fcmp-against-zero. + if (getWantsI1()) { + setWantsI1(false); + this.ctx.setVariableType(resultI1, "i1"); + return resultI1; + } + + const resultI32 = this.ctx.nextTemp(); + this.ctx.emit(`${resultI32} = zext i1 ${resultI1} to i32`); const result = this.ctx.nextTemp(); this.ctx.emit(`${result} = sitofp i32 ${resultI32} to double`); this.ctx.setVariableType(result, "double"); @@ -798,6 +808,17 @@ export class BinaryExpressionGenerator { const rhs = swapped ? byteVal : `${literalVal}`; const cmpResult = this.ctx.emitIcmp(icmpPred, "i8", lhs, rhs); + // Fast path: if the enclosing branch wants an i1 directly (e.g. the + // comparison is an `if (flags[p] === 1)` guard), return the raw boolean + // instead of round-tripping through i32 → double → fcmp-against-zero. + // The previous unconditional widening cost ~3 extra instructions per + // iteration on integer-heavy hot loops like the sieve's marking pass. + if (getWantsI1()) { + setWantsI1(false); + this.ctx.setVariableType(cmpResult, "i1"); + return cmpResult; + } + const i32Result = this.ctx.nextTemp(); this.ctx.emit(`${i32Result} = zext i1 ${cmpResult} to i32`); const result = this.ctx.nextTemp(); diff --git a/src/codegen/statements/control-flow.ts b/src/codegen/statements/control-flow.ts index a57a8742..53aed991 100644 --- a/src/codegen/statements/control-flow.ts +++ b/src/codegen/statements/control-flow.ts @@ -8,6 +8,7 @@ import { MemberAccessNode, VariableNode, BinaryNode, + UnaryNode, InterfaceDeclaration, MethodCallNode, InterfaceField, @@ -122,12 +123,61 @@ export class ControlFlowGenerator { ) { return false; } - const lt = bin.left.type; - const rt = bin.right.type; - if (lt === "binary" || rt === "binary") return false; - if (lt === "call" || rt === "call") return false; - if (lt === "method_call" || rt === "method_call") return false; - if (lt === "conditional" || rt === "conditional") return false; + if (!this.isPureSubexprForBranch(bin.left)) return false; + if (!this.isPureSubexprForBranch(bin.right)) return false; + return true; + } + + // A "pure sub-expression" for the branch-condition fast path is any + // expression that does NOT itself consume the global `wantsI1` flag, i.e. + // no nested comparisons, no logical ops, no conditionals, no calls. + // Simple arithmetic (mul, add, sub, shifts), variable reads, member/index + // accesses, and numeric literals are all safe — they produce numeric + // values without touching the i1 propagation state, so the OUTER binary + // comparison still cleanly consumes `wantsI1` at the end. + // + // Previously the check bailed on ANY nested binary, which meant loop + // guards like `while (p * p <= LIMIT)` fell through to the slow path that + // round-trips the comparison result through `i32 → double → fcmp`. + private isPureSubexprForBranch(expr: Expression): boolean { + const t = expr.type; + if (t === "call" || t === "method_call") return false; + if (t === "conditional") return false; + if (t === "arrow_function" || t === "await") return false; + if (t === "binary") { + const bin = expr as BinaryNode; + const op = bin.op; + // Reject nested comparisons / logical ops — those consume wantsI1. + if ( + op === "<" || + op === ">" || + op === "<=" || + op === ">=" || + op === "==" || + op === "!=" || + op === "===" || + op === "!==" || + op === "&&" || + op === "||" || + op === "??" + ) { + return false; + } + // Recurse — inner arithmetic is fine as long as its sub-expressions + // are also pure for the branch path. + if (!this.isPureSubexprForBranch(bin.left)) return false; + if (!this.isPureSubexprForBranch(bin.right)) return false; + return true; + } + if (t === "unary") { + const un = expr as UnaryNode; + // `!x` would consume wantsI1 via its inner comparison; reject. + if (un.op === "!") return false; + return this.isPureSubexprForBranch(un.operand); + } + // Everything else (number/string/boolean/null literals, variable reads, + // member_access, index_access) is pure from the i1-propagation + // standpoint. return true; }