From b4cfb707134bb2225411d554a05408aa42d04a3d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 11 Feb 2026 00:19:51 +0000 Subject: [PATCH 1/2] Fix: Parse #e prefixed decimals as exact Rational/BigInt (#6) This commit modifies the number parser to correctly handle the `#e` exactness prefix when applied to decimal numbers (e.g. `#e1.223`) or scientific notation (e.g. `#e1.2e2`). Previously, these were parsed as inexact floating point numbers, violating R7RS which states that `#e` must force exactness. Changes: - Added `parseDecimalAsExact` helper in `src/core/interpreter/reader/number_parser.js` to parse decimal strings into `Rational` or `BigInt` using BigInt arithmetic. - Updated `parsePrefixedNumber` to use this new helper when `exactness` is 'exact'. - Added error handling to throw `SchemeReadError` for `#e+inf.0` / `#e+nan.0`, as these cannot be exact. - Added comprehensive unit tests in `tests/core/interpreter/reader/number_parser_tests.js`. Verified with: - New unit tests passing. - Chibi compliance tests passing (including numeric syntax). - Full regression suite passing. Co-authored-by: mark-friedman <1743690+mark-friedman@users.noreply.github.com> --- package-lock.json | 4 +- src/core/interpreter/reader/number_parser.js | 54 +++++++++++++++++++ .../interpreter/reader/number_parser_tests.js | 44 +++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1022490..940e4c7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scheme-js-4", - "version": "0.1.0", + "version": "1.0.0-beta", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "scheme-js-4", - "version": "0.1.0", + "version": "1.0.0-beta", "devDependencies": { "rollup": "^4.54.0" } diff --git a/src/core/interpreter/reader/number_parser.js b/src/core/interpreter/reader/number_parser.js index e47d4b8..372e92c 100644 --- a/src/core/interpreter/reader/number_parser.js +++ b/src/core/interpreter/reader/number_parser.js @@ -141,6 +141,48 @@ export function parseNumber(token, exactness) { return null; // Not a number } +/** + * Parses a decimal string (with optional exponent) as an exact Rational or BigInt. + * Used when #e prefix is present. + * @param {string} str + * @returns {Rational|bigint} + */ +function parseDecimalAsExact(str) { + const lower = str.toLowerCase(); + if (lower.includes('inf.0') || lower.includes('nan.0')) { + throw new SchemeReadError('exactness prefix #e cannot be used with infinities or NaN', 'read'); + } + + // Normalize exponent + const normalized = str.replace(/[sSfFdDlL](?=[+-]?\d)/g, 'e'); + + // Split into coefficient and exponent + // Use 'e' or 'E' (normalized above to 'e') + let [coeffStr, expStr] = normalized.toLowerCase().split('e'); + let exp = expStr ? BigInt(expStr) : 0n; + + // Handle decimal point in coefficient + let fractionalDigits = 0; + if (coeffStr.includes('.')) { + const parts = coeffStr.split('.'); + fractionalDigits = parts[1].length; + coeffStr = parts[0] + parts[1]; + } + + let coeff = BigInt(coeffStr); + + // Effective exponent = explicit exponent - number of fractional digits + // value = coeff * 10^(exp - fractionalDigits) + let effectiveExp = exp - BigInt(fractionalDigits); + + if (effectiveExp >= 0n) { + return coeff * (10n ** effectiveExp); + } else { + const denominator = 10n ** (-effectiveExp); + return new Rational(coeff, denominator); + } +} + /** * Parses a number with R7RS prefix notation. * Handles #x (hex), #o (octal), #b (binary), #d (decimal), #e (exact), #i (inexact) @@ -243,9 +285,15 @@ export function parsePrefixedNumber(token) { // Handle special values: +inf.0, -inf.0, +nan.0, -nan.0 (case-insensitive) const lowerRest = rest.toLowerCase(); if (/^[+-]?inf\.0$/.test(lowerRest)) { + if (exactness === 'exact') { + throw new SchemeReadError('exactness prefix #e cannot be used with infinities or NaN', 'read'); + } return lowerRest.startsWith('-') ? -Infinity : Infinity; } if (/^[+-]?nan\.0$/.test(lowerRest)) { + if (exactness === 'exact') { + throw new SchemeReadError('exactness prefix #e cannot be used with infinities or NaN', 'read'); + } return NaN; } @@ -255,6 +303,12 @@ export function parsePrefixedNumber(token) { if (!/^[+-]?(\d+(\.\d*)?|\.\d+)([eEsSfFdDlL][+-]?\d+)?$/.test(rest)) { return null; } + + // Exact decimal parsing + if (exactness === 'exact') { + return parseDecimalAsExact(rest); + } + const normalized = rest.replace(/[sSfFdDlL](?=[+-]?\d)/g, 'e'); result = parseFloat(normalized); } else { diff --git a/tests/core/interpreter/reader/number_parser_tests.js b/tests/core/interpreter/reader/number_parser_tests.js index 006a439..9987bdf 100644 --- a/tests/core/interpreter/reader/number_parser_tests.js +++ b/tests/core/interpreter/reader/number_parser_tests.js @@ -113,6 +113,50 @@ export function runNumberParserTests(logger) { assert(logger, 'decimal inf', parsePrefixedNumber('#d+inf.0'), Infinity); assert(logger, 'decimal nan', Number.isNaN(parsePrefixedNumber('#d+nan.0')), true); + + logger.title('parsePrefixedNumber - exact decimal (Issue #6)'); + + { + const r = parsePrefixedNumber('#e1.223'); + assert(logger, '#e1.223 is Rational', r instanceof Rational, true); + assert(logger, '#e1.223 numerator', r.numerator, 1223n); + assert(logger, '#e1.223 denominator', r.denominator, 1000n); + assert(logger, '#e1.223 is exact', r.exact, true); + } + + { + const r = parsePrefixedNumber('#e1.2e2'); + assert(logger, '#e1.2e2 is BigInt', typeof r === 'bigint', true); + assert(logger, '#e1.2e2 value', r, 120n); + } + + { + const r = parsePrefixedNumber('#e1.2e-1'); + assert(logger, '#e1.2e-1 is Rational', r instanceof Rational, true); + assert(logger, '#e1.2e-1 numerator', r.numerator, 3n); // 12/100 -> 3/25 + assert(logger, '#e1.2e-1 denominator', r.denominator, 25n); + } + + // Different exponent markers + assert(logger, '#e1s2', parsePrefixedNumber('#e1s2'), 100n); + assert(logger, '#e1f2', parsePrefixedNumber('#e1f2'), 100n); + assert(logger, '#e1d2', parsePrefixedNumber('#e1d2'), 100n); + assert(logger, '#e1l2', parsePrefixedNumber('#e1l2'), 100n); + + // Error cases + try { + parsePrefixedNumber('#e+inf.0'); + logger.fail('Should throw error for #e+inf.0'); + } catch (e) { + assert(logger, 'Error for #e+inf.0', e.message.includes('exactness'), true); + } + + try { + parsePrefixedNumber('#e+nan.0'); + logger.fail('Should throw error for #e+nan.0'); + } catch (e) { + assert(logger, 'Error for #e+nan.0', e.message.includes('exactness'), true); + } } export default runNumberParserTests; From 8947e01acef327ea67e7940d10783142c2e97314 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 11 Feb 2026 00:34:18 +0000 Subject: [PATCH 2/2] Refactor number parser for readability - Split `parsePrefixedNumber` into smaller helper functions: - `parsePrefixes` - `parseRationalWithRadix` - `parseComplexWithRadix` - `parseRealWithRadix` - This addresses code review feedback regarding function length and complexity. - Verified with unit tests. Co-authored-by: mark-friedman <1743690+mark-friedman@users.noreply.github.com> --- src/core/interpreter/reader/number_parser.js | 200 ++++++++++++------- 1 file changed, 130 insertions(+), 70 deletions(-) diff --git a/src/core/interpreter/reader/number_parser.js b/src/core/interpreter/reader/number_parser.js index 372e92c..3a908a2 100644 --- a/src/core/interpreter/reader/number_parser.js +++ b/src/core/interpreter/reader/number_parser.js @@ -184,14 +184,12 @@ function parseDecimalAsExact(str) { } /** - * Parses a number with R7RS prefix notation. - * Handles #x (hex), #o (octal), #b (binary), #d (decimal), #e (exact), #i (inexact) - * and combinations like #e#x10 or #x#e10 - * @param {string} token - Token starting with # - * @returns {number|bigint|Rational|null} + * Extracts R7RS prefixes (#e, #i, #b, #o, #d, #x) from the token. + * @param {string} token + * @returns {{exactness: string|null, radix: number, rest: string}} */ -export function parsePrefixedNumber(token) { - let exactness = null; // 'exact', 'inexact', or null +function parsePrefixes(token) { + let exactness = null; let radix = 10; let rest = token; @@ -224,99 +222,117 @@ export function parsePrefixedNumber(token) { rest = rest.substring(2); break; default: - return null; // Not a numeric prefix + // Not a known numeric prefix, stop parsing prefixes + // This might happen for things like #\char if passed here, + // though parsePrefixedNumber should only be called for numbers. + return { exactness, radix, rest }; } } + return { exactness, radix, rest }; +} - // If still starts with #, it's not a valid number - if (rest.startsWith('#')) { - return null; - } - - // Normalize alternative exponent markers for decimal numbers - if (radix === 10 && /^[+-]?(\d+\.?\d*|\.\d+)[sSfFdDlL][+-]?\d+$/.test(rest)) { - rest = rest.replace(/[sSfFdDlL]/, 'e'); - } +/** + * Attempts to parse a rational number string with a given radix. + * @param {string} str + * @param {number} radix + * @param {string|null} exactness + * @returns {number|Rational|null} + */ +function parseRationalWithRadix(str, radix, exactness) { + const rationalMatch = str.match(/^([+-]?[0-9a-fA-F]+)\/([0-9a-fA-F]+)$/); + if (!rationalMatch) return null; - // Handle rational with radix: #x10/2 means 16/2 = 8 - const rationalMatch = rest.match(/^([+-]?[0-9a-fA-F]+)\/([0-9a-fA-F]+)$/); - if (rationalMatch) { - const num = BigInt(parseInt(rationalMatch[1], radix)); - const den = BigInt(parseInt(rationalMatch[2], radix)); - if (den === 0n) throw new SchemeReadError('division by zero', 'rational'); + const num = BigInt(parseInt(rationalMatch[1], radix)); + const den = BigInt(parseInt(rationalMatch[2], radix)); + if (den === 0n) throw new SchemeReadError('division by zero', 'rational'); - if (exactness === 'inexact') { - return Number(num) / Number(den); - } - const rat = new Rational(num, den); - return rat; + if (exactness === 'inexact') { + return Number(num) / Number(den); } + const rat = new Rational(num, den); + return rat; +} - // Handle complex with radix: #d10+11i - const complexMatch = rest.match(/^([+-]?[0-9a-fA-F.]+)([+-])([0-9a-fA-F.]+)?i$/); - if (complexMatch) { - const parsePart = (str) => { - if (!str) return 0n; - if (radix === 10 && (str.includes('.') || str.toLowerCase().includes('e'))) return parseFloat(str); - return BigInt(parseInt(str, radix)); - }; +/** + * Attempts to parse a complex number string with a given radix. + * @param {string} str + * @param {number} radix + * @param {string|null} exactness + * @returns {Complex|null} + */ +function parseComplexWithRadix(str, radix, exactness) { + const complexMatch = str.match(/^([+-]?[0-9a-fA-F.]+)([+-])([0-9a-fA-F.]+)?i$/); + if (!complexMatch) return null; + + const parsePart = (s) => { + if (!s) return 0n; + if (radix === 10 && (s.includes('.') || s.toLowerCase().includes('e'))) return parseFloat(s); + return BigInt(parseInt(s, radix)); + }; - const realPart = parsePart(complexMatch[1]); - const sign = complexMatch[2] === '-' ? -1 : 1; - const imagStr = complexMatch[3] || '1'; - let imagPart = parsePart(imagStr); + const realPart = parsePart(complexMatch[1]); + const sign = complexMatch[2] === '-' ? -1 : 1; + const imagStr = complexMatch[3] || '1'; + let imagPart = parsePart(imagStr); - // Apply sign - if (sign === -1) { - if (typeof imagPart === 'bigint') imagPart = -imagPart; - else imagPart = -imagPart; - } + // Apply sign + if (sign === -1) { + if (typeof imagPart === 'bigint') imagPart = -imagPart; + else imagPart = -imagPart; + } - const isResultExact = exactness === 'exact' ? true : - (exactness === 'inexact' ? false : - (typeof realPart !== 'number' && typeof imagPart !== 'number')); + const isResultExact = exactness === 'exact' ? true : + (exactness === 'inexact' ? false : + (typeof realPart !== 'number' && typeof imagPart !== 'number')); - return new Complex(realPart, imagPart, isResultExact); - } + return new Complex(realPart, imagPart, isResultExact); +} - // Parse as integer in the given radix +/** + * Parses a real number (integer or decimal) with a given radix. + * @param {string} str + * @param {number} radix + * @param {string|null} exactness + * @returns {number|bigint|Rational|null} + */ +function parseRealWithRadix(str, radix, exactness) { let result; // Handle special values: +inf.0, -inf.0, +nan.0, -nan.0 (case-insensitive) - const lowerRest = rest.toLowerCase(); - if (/^[+-]?inf\.0$/.test(lowerRest)) { + const lowerStr = str.toLowerCase(); + if (/^[+-]?inf\.0$/.test(lowerStr)) { if (exactness === 'exact') { throw new SchemeReadError('exactness prefix #e cannot be used with infinities or NaN', 'read'); } - return lowerRest.startsWith('-') ? -Infinity : Infinity; + return lowerStr.startsWith('-') ? -Infinity : Infinity; } - if (/^[+-]?nan\.0$/.test(lowerRest)) { + if (/^[+-]?nan\.0$/.test(lowerStr)) { if (exactness === 'exact') { throw new SchemeReadError('exactness prefix #e cannot be used with infinities or NaN', 'read'); } return NaN; } - if (radix === 10 && (rest.includes('.') || rest.toLowerCase().includes('e') || rest.toLowerCase().includes('s') || rest.toLowerCase().includes('f') || rest.toLowerCase().includes('d') || rest.toLowerCase().includes('l'))) { + if (radix === 10 && (str.includes('.') || str.toLowerCase().includes('e') || str.toLowerCase().includes('s') || str.toLowerCase().includes('f') || str.toLowerCase().includes('d') || str.toLowerCase().includes('l'))) { // Decimal with fractional part or exponent // Validate strict format: optional sign, digits, optional dot, optional digits, optional exponent - if (!/^[+-]?(\d+(\.\d*)?|\.\d+)([eEsSfFdDlL][+-]?\d+)?$/.test(rest)) { + if (!/^[+-]?(\d+(\.\d*)?|\.\d+)([eEsSfFdDlL][+-]?\d+)?$/.test(str)) { return null; } // Exact decimal parsing if (exactness === 'exact') { - return parseDecimalAsExact(rest); + return parseDecimalAsExact(str); } - const normalized = rest.replace(/[sSfFdDlL](?=[+-]?\d)/g, 'e'); + const normalized = str.replace(/[sSfFdDlL](?=[+-]?\d)/g, 'e'); result = parseFloat(normalized); } else { // Integer in given radix -> BigInt (exact) // Verify chars const validChars = '0123456789abcdefghijklmnopqrstuvwxyz'.slice(0, radix); - const checkRest = rest.replace(/^[+-]/, '').toLowerCase(); - for (const char of checkRest) { + const checkStr = str.replace(/^[+-]/, '').toLowerCase(); + for (const char of checkStr) { if (!validChars.includes(char)) return null; } @@ -325,31 +341,75 @@ export function parsePrefixedNumber(token) { if (radix !== 10) { const prefix = radix === 16 ? '0x' : (radix === 8 ? '0o' : '0b'); try { - if (rest.startsWith('-')) { - result = -BigInt(prefix + rest.slice(1)); - } else if (rest.startsWith('+')) { - result = BigInt(prefix + rest.slice(1)); + if (str.startsWith('-')) { + result = -BigInt(prefix + str.slice(1)); + } else if (str.startsWith('+')) { + result = BigInt(prefix + str.slice(1)); } else { - result = BigInt(prefix + rest); + result = BigInt(prefix + str); } } catch (e) { return null; } } else { try { - result = BigInt(rest); + result = BigInt(str); } catch (e) { return null; } } } - // BigInt can never be NaN; only check for Number - if (typeof result === 'number' && isNaN(result)) { + return result; +} + +/** + * Parses a number with R7RS prefix notation. + * Handles #x (hex), #o (octal), #b (binary), #d (decimal), #e (exact), #i (inexact) + * and combinations like #e#x10 or #x#e10 + * @param {string} token - Token starting with # + * @returns {number|bigint|Rational|null} + */ +export function parsePrefixedNumber(token) { + const { exactness, radix, rest } = parsePrefixes(token); + + // If still starts with #, it's not a valid number (e.g. invalid prefix combo) + if (rest.startsWith('#')) { return null; } - // Apply exactness + // Normalize alternative exponent markers for decimal numbers + let workingStr = rest; + if (radix === 10 && /^[+-]?(\d+\.?\d*|\.\d+)[sSfFdDlL][+-]?\d+$/.test(rest)) { + workingStr = rest.replace(/[sSfFdDlL]/, 'e'); + } + + // 1. Try Rational + const rational = parseRationalWithRadix(workingStr, radix, exactness); + if (rational !== null) return rational; + + // 2. Try Complex + const complex = parseComplexWithRadix(workingStr, radix, exactness); + if (complex !== null) return complex; + + // 3. Try Real (Integer/Decimal) + const result = parseRealWithRadix(workingStr, radix, exactness); + + // BigInt can never be NaN; only check for Number + if (typeof result === 'number' && Number.isNaN(result)) { + // If it's NaN, we must ensure it was explicitly parsed as NaN, otherwise it's a failure (e.g. invalid string) + // parseRealWithRadix handles 'nan.0' explicitly. + // However, parseFloat('foo') returns NaN. + // We only want to return NaN if the input string *looks* like a NaN. + if (/^[+-]?nan\.0$/i.test(rest)) { + return NaN; + } + return null; + } + + if (result === null) return null; + + // Apply exactness finalization if (exactness === 'inexact') { // Force to Number (inexact) return typeof result === 'bigint' ? Number(result) : result;