From 7fef0fa5480fa7c598033542776cd9d3309b7c00 Mon Sep 17 00:00:00 2001 From: egecanakincioglu Date: Mon, 8 Jun 2026 02:47:09 +0200 Subject: [PATCH 1/4] fix(runtime): implement native File.read lowering for self-host compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add File__read runtime helper in IRLower: - Route File.read() → File__read in lowerStaticCall - generateFileRead: open + lseek(END) + mmap + read + close - Uses existing syscall primitives (open/read/close/lseek/mmap) - Returns PTR to null-terminated file contents - Empty string on any error (file not found, read failure, etc.) Fixes File__read unresolved label in S3→S4 self-host build. --- arimo/compiler/backend/IRLower.arm | 100 +++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/arimo/compiler/backend/IRLower.arm b/arimo/compiler/backend/IRLower.arm index b5a9bc9..fcf810a 100644 --- a/arimo/compiler/backend/IRLower.arm +++ b/arimo/compiler/backend/IRLower.arm @@ -1451,6 +1451,14 @@ public class IRLower { this.emit(IRInstr.load(IRValue.reg(a0Reg, IRType.PTR), IRValue.reg(avReg, IRType.PTR), IRType.PTR)); return IRValue.reg(a0Reg, IRType.PTR); } + // File.read(path) → File__read wrapper (open + read + close) + if (class_ == "File" && method == "read") { + IRValue pathVal = this.lowerExpr(args.get(0) as Expr); + String retR = this.newReg(); + List frArgs = List(); frArgs.append(pathVal); + this.emit(IRInstr.call(IRValue.reg(retR, IRType.PTR), "File__read", frArgs)); + return IRValue.reg(retR, IRType.PTR); + } // Generic static call fallback String fnName = "${class_}__${method}"; List callArgsFn = List(); @@ -3060,6 +3068,7 @@ public class IRLower { this.generateSystem(); this.generateBuildArgs(); this.generateFileExists(); + this.generateFileRead(); this.generateStartFn(); } @@ -3268,4 +3277,95 @@ public class IRLower { this.emit(IRInstr.label(endL)); this.emit(IRInstr.ret(retV)); } + + // generateFileRead: native File__read(path) → PTR helper. + // Opens path read-only, reads entire file into mmap-allocated buffer, + // null-terminates, returns pointer. Returns empty string on any error. + private generateFileRead() { + this.beginFn("File__read", IRType.PTR); + this.addParamToLast("path", IRType.PTR); + this.resetFnContext(); this.emit(IRInstr.label("entry")); + IRValue pathV = IRValue.reg("path", IRType.PTR); + + // fd = __arimo_fopen(path, "r") + String modeR = this.newReg(); IRValue modeV = IRValue.reg(modeR, IRType.PTR); + this.emit(IRInstr.mov(modeV, IRValue.global(this.internStr("r")))); + List foArgs = List(); foArgs.append(pathV); foArgs.append(modeV); + String fdR = this.newReg(); IRValue fdV = IRValue.reg(fdR, IRType.I64); + this.emit(IRInstr.call(fdV, "__arimo_fopen", foArgs)); + + // if fd == 0 → return "" + String emptyR = this.newReg(); IRValue emptyV = IRValue.reg(emptyR, IRType.PTR); + this.emit(IRInstr.mov(emptyV, IRValue.global(this.internStr("")))); + this.emit(IRInstr.cmp(fdV, IRValue.ofInt(0, IRType.I64))); + this.emit(IRInstr.branch(IROpcode.JE, "fr_done_empty")); + + // size = lseek(fd, 0, SEEK_END=2) → syscall 8 + List lseArgs = List(); lseArgs.append(fdV); + lseArgs.append(IRValue.ofInt(0, IRType.I64)); + lseArgs.append(IRValue.ofInt(2, IRType.I64)); + String szR = this.newReg(); IRValue szV = IRValue.reg(szR, IRType.I64); + this.emit(IRInstr.syscall(szV, 8, lseArgs)); + + // if size < 0 → close and return "" + this.emit(IRInstr.cmp(szV, IRValue.ofInt(0, IRType.I64))); + this.emit(IRInstr.branch(IROpcode.JL, "fr_bad")); + + // lseek(fd, 0, SEEK_SET=0) → rewind + List lse2Args = List(); lse2Args.append(fdV); + lse2Args.append(IRValue.ofInt(0, IRType.I64)); + lse2Args.append(IRValue.ofInt(0, IRType.I64)); + String igR = this.newReg(); + this.emit(IRInstr.syscall(IRValue.reg(igR, IRType.I64), 8, lse2Args)); + + // allocSize = size + 1 (null terminator), minimum 1 + String asR = this.newReg(); IRValue asV = IRValue.reg(asR, IRType.I64); + this.emit(IRInstr.binop(IROpcode.ADD, asV, szV, IRValue.ofInt(1, IRType.I64))); + + // buf = mmap(0, allocSize, PROT_RW=3, MAP_PRIVATE|ANON=34, -1, 0) → syscall 9 + List mmArgs = List(); + mmArgs.append(IRValue.ofInt(0, IRType.I64)); + mmArgs.append(asV); + mmArgs.append(IRValue.ofInt(3, IRType.I64)); + mmArgs.append(IRValue.ofInt(34, IRType.I64)); + mmArgs.append(IRValue.ofInt(-1, IRType.I64)); + mmArgs.append(IRValue.ofInt(0, IRType.I64)); + String bufR = this.newReg(); IRValue bufV = IRValue.reg(bufR, IRType.PTR); + this.emit(IRInstr.syscall(bufV, 9, mmArgs)); + + // if buf == 0 (mmap failed) → close and return "" + this.emit(IRInstr.cmp(bufV, IRValue.ofInt(0, IRType.I64))); + this.emit(IRInstr.branch(IROpcode.JE, "fr_bad")); + + // n = read(fd, buf, size) → syscall 0 + List rdArgs = List(); rdArgs.append(fdV); + rdArgs.append(bufV); rdArgs.append(szV); + String nR = this.newReg(); IRValue nV = IRValue.reg(nR, IRType.I64); + this.emit(IRInstr.syscall(nV, 0, rdArgs)); + + // close(fd) → syscall 3 + List clArgs = List(); clArgs.append(fdV); + String clR = this.newReg(); + this.emit(IRInstr.syscall(IRValue.reg(clR, IRType.I64), 3, clArgs)); + + // if n < 0 → return "" + this.emit(IRInstr.cmp(nV, IRValue.ofInt(0, IRType.I64))); + this.emit(IRInstr.branch(IROpcode.JL, "fr_done_empty")); + + // buf[n] = 0 → null-terminate + String offR = this.newReg(); IRValue offV = IRValue.reg(offR, IRType.PTR); + this.emit(IRInstr.binop(IROpcode.ADD, offV, bufV, nV)); + this.emit(IRInstr.store(IRValue.ofInt(0, IRType.I8), offV, IRType.I8)); + + // return buf + this.emit(IRInstr.ret(bufV)); + + // Error/empty paths + this.emit(IRInstr.label("fr_bad")); + List cl2Args = List(); cl2Args.append(fdV); + String cl2R = this.newReg(); + this.emit(IRInstr.syscall(IRValue.reg(cl2R, IRType.I64), 3, cl2Args)); + this.emit(IRInstr.label("fr_done_empty")); + this.emit(IRInstr.ret(emptyV)); + } } From b2d09e847015427c6a0656f71bacc51728af500f Mon Sep 17 00:00:00 2001 From: egecanakincioglu Date: Mon, 8 Jun 2026 02:50:17 +0200 Subject: [PATCH 2/4] fix(runtime): implement native File.write lowering for self-host compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add File__write runtime helper in IRLower: - Route File.write() → File__write in lowerStaticCall - generateFileWrite: fopen(path, w) + strlen + write syscall + close - Uses existing __arimo_fopen (write-truncate mode) and __arimo_strlen - Void return, silent on error (closes fd if opened) Fixes File__write unresolved label in S3→S4 self-host build. --- arimo/compiler/backend/IRLower.arm | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arimo/compiler/backend/IRLower.arm b/arimo/compiler/backend/IRLower.arm index fcf810a..cbe5209 100644 --- a/arimo/compiler/backend/IRLower.arm +++ b/arimo/compiler/backend/IRLower.arm @@ -1451,6 +1451,14 @@ public class IRLower { this.emit(IRInstr.load(IRValue.reg(a0Reg, IRType.PTR), IRValue.reg(avReg, IRType.PTR), IRType.PTR)); return IRValue.reg(a0Reg, IRType.PTR); } + // File.write(path, content) → File__write wrapper (open + write + close) + if (class_ == "File" && method == "write") { + IRValue pathVal = this.lowerExpr(args.get(0) as Expr); + IRValue contentVal = this.lowerExpr(args.get(1) as Expr); + List fwArgs = List(); fwArgs.append(pathVal); fwArgs.append(contentVal); + this.emit(IRInstr.call(IRValue.reg(this.newReg(), IRType.VOID), "File__write", fwArgs)); + return IRValue.none(); + } // File.read(path) → File__read wrapper (open + read + close) if (class_ == "File" && method == "read") { IRValue pathVal = this.lowerExpr(args.get(0) as Expr); @@ -3069,6 +3077,7 @@ public class IRLower { this.generateBuildArgs(); this.generateFileExists(); this.generateFileRead(); + this.generateFileWrite(); this.generateStartFn(); } @@ -3278,6 +3287,47 @@ public class IRLower { this.emit(IRInstr.ret(retV)); } + // generateFileWrite: native File__write(path, content) → Void helper. + // Opens path write-truncate, writes content via strlen+write syscall, closes. + private generateFileWrite() { + this.beginFn("File__write", IRType.VOID); + this.addParamToLast("path", IRType.PTR); + this.addParamToLast("content", IRType.PTR); + this.resetFnContext(); this.emit(IRInstr.label("entry")); + IRValue pathV = IRValue.reg("path", IRType.PTR); + IRValue contentV = IRValue.reg("content", IRType.PTR); + + // fd = __arimo_fopen(path, "w") + String modeR = this.newReg(); IRValue modeV = IRValue.reg(modeR, IRType.PTR); + this.emit(IRInstr.mov(modeV, IRValue.global(this.internStr("w")))); + List foArgs = List(); foArgs.append(pathV); foArgs.append(modeV); + String fdR = this.newReg(); IRValue fdV = IRValue.reg(fdR, IRType.I64); + this.emit(IRInstr.call(fdV, "__arimo_fopen", foArgs)); + + // if fd == 0 → return + this.emit(IRInstr.cmp(fdV, IRValue.ofInt(0, IRType.I64))); + this.emit(IRInstr.branch(IROpcode.JE, "fw_done")); + + // len = strlen(content) + String lenR = this.newReg(); IRValue lenV = IRValue.reg(lenR, IRType.I64); + List slArgs = List(); slArgs.append(contentV); + this.emit(IRInstr.call(lenV, "__arimo_strlen", slArgs)); + + // write(fd, content, len) → syscall 1 + List wrArgs = List(); wrArgs.append(fdV); + wrArgs.append(contentV); wrArgs.append(lenV); + String wrR = this.newReg(); + this.emit(IRInstr.syscall(IRValue.reg(wrR, IRType.I64), 1, wrArgs)); + + // close(fd) → syscall 3 + List clArgs = List(); clArgs.append(fdV); + String clR = this.newReg(); + this.emit(IRInstr.syscall(IRValue.reg(clR, IRType.I64), 3, clArgs)); + + this.emit(IRInstr.label("fw_done")); + this.emit(IRInstr.retVoid()); + } + // generateFileRead: native File__read(path) → PTR helper. // Opens path read-only, reads entire file into mmap-allocated buffer, // null-terminates, returns pointer. Returns empty string on any error. From ab56310bf84fa7c8107a9ac8f9078a11955f57f6 Mon Sep 17 00:00:00 2001 From: egecanakincioglu Date: Mon, 8 Jun 2026 02:54:52 +0200 Subject: [PATCH 3/4] fix(runtime): route Process.exec to native system helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Route Process.exec(cmd) → __arimo_system in lowerStaticCall. __arimo_system already fully implemented via generateSystem(): fork+execve+waitpid, delegates to /bin/sh -c. No new helper needed. 6-line routing addition. Fixes Process__exec unresolved label in S3→S4 self-host build. --- arimo/compiler/backend/IRLower.arm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arimo/compiler/backend/IRLower.arm b/arimo/compiler/backend/IRLower.arm index cbe5209..447d690 100644 --- a/arimo/compiler/backend/IRLower.arm +++ b/arimo/compiler/backend/IRLower.arm @@ -1451,6 +1451,14 @@ public class IRLower { this.emit(IRInstr.load(IRValue.reg(a0Reg, IRType.PTR), IRValue.reg(avReg, IRType.PTR), IRType.PTR)); return IRValue.reg(a0Reg, IRType.PTR); } + // Process.exec(cmd) → __arimo_system (fork+execve+waitpid) + if (class_ == "Process" && method == "exec") { + IRValue cmdVal = this.lowerExpr(args.get(0) as Expr); + String retR = this.newReg(); + List sArgs = List(); sArgs.append(cmdVal); + this.emit(IRInstr.call(IRValue.reg(retR, IRType.I64), "__arimo_system", sArgs)); + return IRValue.reg(retR, IRType.I64); + } // File.write(path, content) → File__write wrapper (open + write + close) if (class_ == "File" && method == "write") { IRValue pathVal = this.lowerExpr(args.get(0) as Expr); From c21ea5d70459ce7aaf59ed670b017557fc2e9141 Mon Sep 17 00:00:00 2001 From: egecanakincioglu Date: Mon, 8 Jun 2026 03:38:06 +0200 Subject: [PATCH 4/4] fix(codegen): emit boolean NOT as XOR with 1, not bitwise NOT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IROpcode.NOT handler in IRToX64 (both safe and old paths) used the x86 NOT instruction (bitwise complement). For boolean values (0/1), bitwise NOT produces -1/-2 which are never equal to 0, causing while (!done) conditions to never exit. Fix: emit XOR reg, 1 for boolean NOT. Toggles between 0 and 1 correctly. Minimal reproducer: Boolean done = false; while (!done) { ... done = true; } Previously infinite loop; now exits correctly. Fixes S4 lexer hang — lexer was trapped in infinite while loop during tokenization. --- arimo/compiler/backend/IRToX64.arm | 56 +++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/arimo/compiler/backend/IRToX64.arm b/arimo/compiler/backend/IRToX64.arm index 456a5b7..226cc54 100644 --- a/arimo/compiler/backend/IRToX64.arm +++ b/arimo/compiler/backend/IRToX64.arm @@ -167,6 +167,9 @@ public class IRToX64 { this.safeRa.assertWritten(val.name); Integer off = this.safeRa.slotOffsetByName(val.name); if (off == 0) { this.safeRa.errorCode = 3; } + if (this.curName.contains("tokenize")) { + IO.println("SAFEDBG LOAD fn=${this.curName} name=${val.name} off=${off}"); + } this.enc.movRMem(scratchReg, X64Reg.RBP, off); return scratchReg; } @@ -184,6 +187,9 @@ public class IRToX64 { this.safeRa.assertKnown(name); Integer off = this.safeRa.slotOffsetByName(name); if (off == 0) { this.safeRa.errorCode = 3; } + if (this.curName.contains("tokenize")) { + IO.println("SAFEDBG STORE fn=${this.curName} name=${name} off=${off}"); + } this.enc.movMemR(X64Reg.RBP, off, srcReg); this.safeRa.markValueWritten(name); } @@ -285,6 +291,15 @@ public class IRToX64 { if (frameSize > 1024) { IO.println("SAFEDBG BIGFRAME fn=${fn.name} slots=${this.safeRa.totalSlots()} frame=${frameSize}"); } + // DEBUG: slot dump for tokenize + if (fn.name.contains("tokenize")) { + IO.println("SAFEDBG SLOTS fn=${fn.name} slots=${this.safeRa.totalSlots()} frame=${frameSize}"); + Integer sdi = 0; + while (sdi < this.safeRa.totalSlots()) { + IO.println("SAFEDBG SLOT idx=${sdi} off=${this.safeRa.slotOffset(sdi)}"); + sdi = sdi + 1; + } + } this.enc.defineLabel(fn.name); @@ -508,7 +523,8 @@ public class IRToX64 { Integer sr = this.loadVal(src); Integer dr = this.dstReg(instr.dst.name); if (dr != sr) { this.enc.movRR(dr, sr); } - this.enc.notR(dr); + this.enc.movRI(X64Reg.R11, 1); + this.enc.xorRR(dr, X64Reg.R11); this.flushDst(instr.dst.name, dr); return; } @@ -694,6 +710,34 @@ public class IRToX64 { private emitInstrSafe(instr: IRInstr) { Integer op = instr.op; + if ((this.curName.contains("tokenize") || this.curName.contains("main") || this.curName.contains("T__main")) && (op == IROpcode.CMP || op == IROpcode.JE || op == IROpcode.XOR || op == IROpcode.ADD || + op == IROpcode.JNE || op == IROpcode.JMP || op == IROpcode.MOV)) { + String opName = "?"; + if (op == IROpcode.CMP) { opName = "CMP"; } + if (op == IROpcode.JE) { opName = "JE"; } + if (op == IROpcode.JNE) { opName = "JNE"; } + if (op == IROpcode.JMP) { opName = "JMP"; } + if (op == IROpcode.MOV) { opName = "MOV"; } + if (op == IROpcode.XOR) { opName = "XOR"; } + if (op == IROpcode.ADD) { opName = "ADD"; } + String dName = ""; + if (!instr.dst.isNone() && instr.dst.kind == IRValueKind.REG) { dName = instr.dst.name; } + String aName = ""; + if (instr.operands.length() > 0) { + IRValue a = instr.operands.get(0) as IRValue; + if (a.kind == IRValueKind.REG) { aName = a.name; } + else if (a.kind == IRValueKind.IMM_INT) { aName = "IMM(${a.immInt})"; } + } + String bName = ""; + if (instr.operands.length() > 1) { + IRValue b = instr.operands.get(1) as IRValue; + if (b.kind == IRValueKind.REG) { bName = b.name; } + else if (b.kind == IRValueKind.IMM_INT) { bName = "IMM(${b.immInt})"; } + } + String lbl = ""; + if (instr.label.length() > 0) { lbl = " lbl=${instr.label}"; } + IO.println("SAFEDBG IR fn=${this.curName} op=${opName} dst=${dName} a=${aName} b=${bName}${lbl}"); + } // --- Control flow (no register usage) --- @@ -812,13 +856,15 @@ public class IRToX64 { return; } - // --- NOT --- + // --- NOT (boolean: XOR with 1, not bitwise NOT) --- if (op == IROpcode.NOT) { - Integer sr = this.safeRa.allocScratch(); + List regs = this.safeRa.allocScratch2(); IRValue src = instr.operands.get(0) as IRValue; - sr = this.safeLoadVal(src, sr); - this.enc.notR(sr); + Integer sr = this.safeLoadVal(src, regs.get(0) as Integer); + Integer one = regs.get(1) as Integer; + this.enc.movRI(one, 1); + this.enc.xorRR(sr, one); this.safeStoreDst(instr.dst.name, sr); return; }