diff --git a/packages/js/src/core/dot-notation-parser.ts b/packages/js/src/core/dot-notation-parser.ts index badd5ca..2af95f8 100644 --- a/packages/js/src/core/dot-notation-parser.ts +++ b/packages/js/src/core/dot-notation-parser.ts @@ -343,17 +343,38 @@ export class DotNotationParser implements ValidatableParserInterface { */ private segmentPathCache(path: string): Segment[] { if (this.pathCache !== null) { - const cached = this.pathCache.get(path); + const cacheKey = this.normalizeCacheKey(path); + const cached = this.pathCache.get(cacheKey); if (cached !== null) { return cached; } const segments = this.segmentParser.parseSegments(path); - this.pathCache.set(path, segments); + this.pathCache.set(cacheKey, segments); return segments; } return this.segmentParser.parseSegments(path); } + /** + * Normalize a path to its cache key by stripping the optional root prefix. + * + * The segment parser ignores a leading `$` (and the `.` that may follow), + * so `$.a.b`, `$a.b`, and `a.b` parse identically. Collapsing them to one + * cache key avoids storing duplicate entries for equivalent paths. + * + * @param path - Dot-notation path string. + * @returns Normalized cache key. + */ + private normalizeCacheKey(path: string): string { + if (path[0] === '$') { + path = path.slice(1); + if (path[0] === '.') { + path = path.slice(1); + } + } + return path; + } + /** * Recursively write a value at the given key path. * diff --git a/packages/js/src/parser/yaml-parser.ts b/packages/js/src/parser/yaml-parser.ts index 8db6b33..f201535 100644 --- a/packages/js/src/parser/yaml-parser.ts +++ b/packages/js/src/parser/yaml-parser.ts @@ -75,6 +75,16 @@ export class YamlParser { `YAML merge keys (<<) are not supported (line ${i + 1}).`, ); } + + // Also block merge keys used as flow-map keys ("{<<:" or ", <<:"), + // which the line-start check above would miss. The negative + // lookbehind keeps the match in genuine key position and out of + // adjacent quoted regions; rejection is fail-closed. + if (/(?= 2) { + if (key.startsWith('"') && key.endsWith('"')) { + return this.unescapeDoubleQuoted(key.slice(1, -1)); + } + if (key.startsWith("'") && key.endsWith("'")) { + return key.slice(1, -1).replace(/''/g, "'"); + } + } + + return key; + } + /** * Split flow-syntax items by comma, respecting nested brackets and quotes. * diff --git a/packages/js/src/path-query/segment-filter-parser.ts b/packages/js/src/path-query/segment-filter-parser.ts index 65a8f3c..c4e3743 100644 --- a/packages/js/src/path-query/segment-filter-parser.ts +++ b/packages/js/src/path-query/segment-filter-parser.ts @@ -212,11 +212,40 @@ export class SegmentFilterParser implements FilterEvaluatorInterface { return raw.substring(1, raw.length - 1); } - if (!isNaN(Number(raw)) && raw !== '') { - return raw.includes('.') ? parseFloat(raw) : parseInt(raw, 10); + return this.numericLiteral(raw) ?? raw; + } + + /** + * Coerce a raw token to a number using a runtime-agnostic rule. + * + * Only plain decimal integers and decimal floats (including scientific + * notation) are treated as numbers. Hex (`0x`), binary (`0b`), octal + * (`0o`), and underscore-grouped literals are intentionally left as + * strings so PHP and JS produce identical results for untrusted input. + * + * The branch is chosen by which pattern matched (not by the presence of a + * dot), so `1e3` parses as the number 1000. JS has a single number type, + * so integral and fractional values are both plain numbers — this mirrors + * the PHP side, which collapses integral values (1e3 → int 1000) so they + * compare equal to integer data under strict equality. + * + * @param raw - Raw token. + * @returns The number, or null when the token is not numeric. + */ + private numericLiteral(raw: string): number | null { + // Plain decimal integer (handled first so the float branch below + // always carries a dot or exponent). + if (/^[+-]?\d+$/.test(raw)) { + return parseInt(raw, 10); + } + + // Decimal float (with a dot, optional exponent) or an integer mantissa + // with a mandatory exponent (e.g. 1e3). Hex/binary/octal never match. + if (/^[+-]?(?:(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)$/.test(raw)) { + return parseFloat(raw); } - return raw; + return null; } /** @@ -239,19 +268,29 @@ export class SegmentFilterParser implements FilterEvaluatorInterface { const expected = condition.value; + if (condition.operator === '==') { + return fieldValue === expected; + } + if (condition.operator === '!=') { + return fieldValue !== expected; + } + + // Relational operators only compare two numbers. Any other type + // combination yields false, keeping PHP and JS identical (PHP's + // native mixed-type comparison and JS coercion diverge otherwise). + if (typeof fieldValue !== 'number' || typeof expected !== 'number') { + return false; + } + switch (condition.operator) { - case '==': - return fieldValue === expected; - case '!=': - return fieldValue !== expected; case '>': - return (fieldValue as number) > (expected as number); + return fieldValue > expected; case '<': - return (fieldValue as number) < (expected as number); + return fieldValue < expected; case '>=': - return (fieldValue as number) >= (expected as number); + return fieldValue >= expected; case '<=': - return (fieldValue as number) <= (expected as number); + return fieldValue <= expected; default: return false; } @@ -355,8 +394,11 @@ export class SegmentFilterParser implements FilterEvaluatorInterface { } const toNumber = (token: string): number | null => { - if (!token.startsWith('@') && !isNaN(Number(token)) && token !== '') { - return token.includes('.') ? parseFloat(token) : parseInt(token, 10); + if (!token.startsWith('@')) { + const literal = this.numericLiteral(token); + if (literal !== null) { + return literal; + } } const val = this.resolveFilterArg(item, token); @@ -365,8 +407,8 @@ export class SegmentFilterParser implements FilterEvaluatorInterface { return val; } - if (typeof val === 'string' && !isNaN(Number(val)) && val !== '') { - return val.includes('.') ? parseFloat(val) : parseInt(val, 10); + if (typeof val === 'string') { + return this.numericLiteral(val); } return null; diff --git a/packages/js/tests/core/dot-notation-parser-edge-cases.test.ts b/packages/js/tests/core/dot-notation-parser-edge-cases.test.ts index 3441da9..c693d57 100644 --- a/packages/js/tests/core/dot-notation-parser-edge-cases.test.ts +++ b/packages/js/tests/core/dot-notation-parser-edge-cases.test.ts @@ -44,6 +44,20 @@ describe(`${DotNotationParser.name} > pathCache integration`, () => { const parser = new DotNotationParser(new SecurityGuard(), new SecurityParser()); expect(parser.get({ a: { b: 1 } }, 'a.b')).toBe(1); }); + + it('shares one cache entry across equivalent root-prefixed paths', () => { + const cache = new FakePathCache(); + const parser = new DotNotationParser(new SecurityGuard(), new SecurityParser(), cache); + const data = { a: { b: 1 } }; + + parser.get(data, 'a.b'); + parser.get(data, '$.a.b'); + parser.get(data, '$a.b'); + + // All three normalize to the key 'a.b': parsed once, one cache entry. + expect(cache.setCallCount).toBe(1); + expect([...cache.store.keys()]).toEqual(['a.b']); + }); }); // Additional branch-coverage tests (targeting Stryker survivors) diff --git a/packages/js/tests/parity.test.ts b/packages/js/tests/parity.test.ts index e12d975..85bdee7 100644 --- a/packages/js/tests/parity.test.ts +++ b/packages/js/tests/parity.test.ts @@ -262,6 +262,37 @@ describe(`${Inline.name} > PathQuery > filter (parity)`, () => { }); expect(accessor.get("items[?contains(@.tag, 'world')].tag")).toEqual(['hello-world']); }); + + it('matches integer data with a scientific-notation literal (1e3 == 1000)', () => { + const accessor = Inline.fromArray({ items: [{ v: 1000 }, { v: 1 }] }); + expect(accessor.get('items[?v == 1e3].v')).toEqual([1000]); + }); + + it('treats a hex literal as a string, matching only string data (0x1A)', () => { + const accessor = Inline.fromArray({ items: [{ v: '0x1A' }, { v: 26 }] }); + expect(accessor.get('items[?v == 0x1A].v')).toEqual(['0x1A']); + }); + + it('excludes a non-numeric string from a > comparison', () => { + const accessor = Inline.fromArray({ items: [{ v: 'abc' }, { v: 10 }] }); + expect(accessor.get('items[?v > 5].v')).toEqual([10]); + }); + + it('excludes a numeric string from a > comparison', () => { + const accessor = Inline.fromArray({ items: [{ v: '10' }, { v: 20 }] }); + expect(accessor.get('items[?v > 5].v')).toEqual([20]); + }); +}); + +describe(`${Inline.name} > PathQuery > YAML flow map (parity)`, () => { + it('splits a quoted flow-map key on the first colon outside quotes', () => { + const accessor = Inline.fromYaml('data: {"key:with:colons": value}'); + expect(accessor.get('data')).toEqual({ 'key:with:colons': 'value' }); + }); + + it('rejects a merge key used inside a flow map', () => { + expect(() => Inline.fromYaml('data: {<<: {a: 1}, b: 2}')).toThrow(); + }); }); describe(`${Inline.name} > PathQuery > multi-key and multi-index (parity)`, () => { diff --git a/packages/js/tests/parser/yaml-parser.test.ts b/packages/js/tests/parser/yaml-parser.test.ts index e173611..8b5ba22 100644 --- a/packages/js/tests/parser/yaml-parser.test.ts +++ b/packages/js/tests/parser/yaml-parser.test.ts @@ -145,6 +145,53 @@ describe(`${YamlParser.name} > inline flow`, () => { const result = makeParser().parse(yaml); expect(result['tags']).toBe('[a b c'); }); + + it('splits on the first colon outside quotes for a double-quoted flow-map key', () => { + // findFlowColon: a colon inside a quoted key must not split the pair + const yaml = 'data: {"key:with:colons": value}'; + const result = makeParser().parse(yaml); + expect(result['data']).toEqual({ 'key:with:colons': 'value' }); + }); + + it('unquotes a single-quoted flow-map key containing a colon', () => { + const yaml = "data: {'a:b': v}"; + const result = makeParser().parse(yaml); + expect(result['data']).toEqual({ 'a:b': 'v' }); + }); + + it('keeps a flow-map value containing a colon (URL) intact', () => { + // Regression: the first colon (after url) is outside quotes + const yaml = 'm: {url: http://x}'; + const result = makeParser().parse(yaml); + expect(result['m']).toEqual({ url: 'http://x' }); + }); + + it('treats a leading colon in a flow-map item as an empty key', () => { + // findFlowColon returns 0; the empty key is kept (colonPos < 0 guard) + const yaml = 'd: {: value}'; + const result = makeParser().parse(yaml); + expect(result['d']).toEqual({ '': 'value' }); + }); + + it('unquotes an empty double-quoted flow-map key', () => { + // unquoteKey strips a 2-char quoted key ("") to an empty string + const yaml = 'd: {"": v}'; + const result = makeParser().parse(yaml); + expect(result['d']).toEqual({ '': 'v' }); + }); + + it('unescapes doubled single quotes inside a single-quoted flow-map key', () => { + const yaml = "d: {'a''b': v}"; + const result = makeParser().parse(yaml); + expect(result['d']).toEqual({ "a'b": 'v' }); + }); + + it('ignores a flow-map item whose only colon is inside an unterminated quote', () => { + // The colon sits inside the quoted region, so no separator is found + const yaml = 'd: {"ab: v}'; + const result = makeParser().parse(yaml); + expect(result['d']).toEqual({}); + }); }); describe(`${YamlParser.name} > security - unsafe constructs`, () => { @@ -232,6 +279,18 @@ describe(`${YamlParser.name} > security - unsafe constructs`, () => { expect(() => makeParser().parse('note: use <<: syntax')).not.toThrow(); }); + it('throws YamlParseException for a merge key used as a flow-map key', () => { + expect(() => makeParser().parse('data: {<<: {a: 1}, b: 2}')).toThrow(/merge key/i); + }); + + it('throws YamlParseException for a merge key as a later flow-map key', () => { + expect(() => makeParser().parse('data: {a: 1, <<: {b: 2}}')).toThrow(/merge key/i); + }); + + it('does not throw for an ordinary flow map without merge keys', () => { + expect(() => makeParser().parse('data: {a: 1, b: 2}')).not.toThrow(); + }); + it('does not throw for ! inside double-quoted string', () => { // quoted string - regex should not match expect(() => makeParser().parse('msg: "hello world"')).not.toThrow(); diff --git a/packages/js/tests/path-query/segment-filter-parser.test.ts b/packages/js/tests/path-query/segment-filter-parser.test.ts index d8c61ec..d95b907 100644 --- a/packages/js/tests/path-query/segment-filter-parser.test.ts +++ b/packages/js/tests/path-query/segment-filter-parser.test.ts @@ -321,6 +321,21 @@ describe(SegmentFilterParser.name, () => { expect(parser.evaluate({ qty: 5 }, expr)).toBe(false); }); + it('evaluates false when an arithmetic operand resolves to a non-numeric type', () => { + // @.tags is an array (neither number nor numeric string) → not coercible + const parser = new SegmentFilterParser(new SecurityGuard()); + const expr = parser.parse('@.tags * @.qty > 0'); + + expect(parser.evaluate({ tags: [1, 2], qty: 5 }, expr)).toBe(false); + }); + + it('evaluates false when an arithmetic operand resolves to a boolean', () => { + const parser = new SegmentFilterParser(new SecurityGuard()); + const expr = parser.parse('@.flag + @.qty > 0'); + + expect(parser.evaluate({ flag: true, qty: 5 }, expr)).toBe(false); + }); + it('evaluates arithmetic addition between two field values', () => { const parser = new SegmentFilterParser(new SecurityGuard()); const expr = parser.parse('@.a + @.b > 5'); @@ -498,6 +513,137 @@ describe(SegmentFilterParser.name, () => { }); }); + describe(`${SegmentFilterParser.name} > numeric literal coercion`, () => { + const value = (expr: string): unknown => + new SegmentFilterParser(new SecurityGuard()).parse(expr).conditions[0].value; + + it('parses scientific notation as a number (1e3 -> 1000)', () => { + expect(value('v==1e3')).toBe(1000); + }); + + it('parses integral scientific notation as an integer-equal number (1.5e2 -> 150)', () => { + expect(value('v==1.5e2')).toBe(150); + }); + + it('keeps a fractional value as a float (2.5)', () => { + expect(value('v==2.5')).toBe(2.5); + }); + + it('keeps a negative-exponent value fractional (1e-3 -> 0.001)', () => { + expect(value('v==1e-3')).toBe(0.001); + }); + + it('preserves a leading-zero decimal integer (007 -> 7)', () => { + expect(value('v==007')).toBe(7); + }); + + it('leaves a hexadecimal literal as a string (0x1A)', () => { + expect(value('v==0x1A')).toBe('0x1A'); + }); + + it('leaves a binary literal as a string (0b101)', () => { + expect(value('v==0b101')).toBe('0b101'); + }); + + it('leaves an octal literal as a string (0o17)', () => { + expect(value('v==0o17')).toBe('0o17'); + }); + + it('leaves an underscore-grouped literal as a string (1_000)', () => { + expect(value('v==1_000')).toBe('1_000'); + }); + + it('parses a leading-plus integer (+5 -> 5)', () => { + expect(value('v==+5')).toBe(5); + }); + + it('parses a leading-minus integer (-10 -> -10)', () => { + expect(value('v==-10')).toBe(-10); + }); + + it('parses a leading-plus float (+1.5 -> 1.5)', () => { + expect(value('v==+1.5')).toBe(1.5); + }); + + it('parses an uppercase-exponent literal (1E3 -> 1000)', () => { + expect(value('v==1E3')).toBe(1000); + }); + + it('parses a multi-digit fraction (12.25 -> 12.25)', () => { + expect(value('v==12.25')).toBe(12.25); + }); + + it('parses a leading-dot multi-digit fraction (.25 -> 0.25)', () => { + expect(value('v==.25')).toBe(0.25); + }); + + it('treats a lone plus sign as a string (+)', () => { + expect(value('v==+')).toBe('+'); + }); + + it('treats a non-numeric word as a string (abc)', () => { + expect(value('v==abc')).toBe('abc'); + }); + + it('matches integer data with a scientific-notation literal', () => { + const parser = new SegmentFilterParser(new SecurityGuard()); + expect(parser.evaluate({ v: 1000 }, parser.parse('v==1e3'))).toBe(true); + }); + + it('uses the same numeric rule inside arithmetic predicates', () => { + const parser = new SegmentFilterParser(new SecurityGuard()); + expect(parser.evaluate({ v: 1000 }, parser.parse('v*1==1e3'))).toBe(true); + }); + }); + + describe(`${SegmentFilterParser.name} > relational operators require numbers`, () => { + const evaluate = (expr: string, item: Record): boolean => { + const parser = new SegmentFilterParser(new SecurityGuard()); + return parser.evaluate(item, parser.parse(expr)); + }; + + it('returns false comparing a non-numeric string with > to a number', () => { + expect(evaluate('v>5', { v: 'abc' })).toBe(false); + }); + + it('returns false comparing an empty string with >= to zero', () => { + expect(evaluate('v>=0', { v: '' })).toBe(false); + }); + + it('returns false comparing a numeric string with > to a number', () => { + expect(evaluate('v>5', { v: '10' })).toBe(false); + }); + + it('returns false comparing a number with > to a non-numeric expected value', () => { + // fieldValue is a number, expected ('abc') is a string → no comparison + expect(evaluate('v>abc', { v: 10 })).toBe(false); + }); + + it('returns false comparing a number with >= to a string expected value', () => { + expect(evaluate('v>=x', { v: 10 })).toBe(false); + }); + + it('returns false comparing null with > to a number', () => { + expect(evaluate('v>0', { v: null })).toBe(false); + }); + + it('returns false comparing an array with > to a number', () => { + expect(evaluate('v>1', { v: [1, 2] })).toBe(false); + }); + + it('still compares two numbers with > (regression)', () => { + expect(evaluate('age>18', { age: 30 })).toBe(true); + }); + + it('still compares two numbers with <= (regression)', () => { + expect(evaluate('age<=30', { age: 30 })).toBe(true); + }); + + it('still compares two floats with >= (regression)', () => { + expect(evaluate('s>=9.5', { s: 9.5 })).toBe(true); + }); + }); + describe(`${SegmentFilterParser.name} > mutation boundary tests`, () => { it('trims whitespace from tokens in splitLogical before parsing condition', () => { const parser = new SegmentFilterParser(new SecurityGuard()); diff --git a/packages/php/src/Core/DotNotationParser.php b/packages/php/src/Core/DotNotationParser.php index 0903b2a..ba485ce 100644 --- a/packages/php/src/Core/DotNotationParser.php +++ b/packages/php/src/Core/DotNotationParser.php @@ -201,17 +201,42 @@ public function getMaxKeys(): int */ private function segmentPathCache(string $path): array { - $cached = $this->pathCache->get($path); + $cacheKey = $this->normalizeCacheKey($path); + + $cached = $this->pathCache->get($cacheKey); if ($cached !== null) { return $cached; } $segments = $this->segmentParser->parseSegments($path); - $this->pathCache->set($path, $segments); + $this->pathCache->set($cacheKey, $segments); return $segments; } + /** + * Normalize a path to its cache key by stripping the optional root prefix. + * + * The segment parser ignores a leading `$` (and the `.` that may follow), + * so `$.a.b`, `$a.b`, and `a.b` parse identically. Collapsing them to one + * cache key avoids storing duplicate entries for equivalent paths. + * + * @param string $path Dot-notation path string. + * + * @return string Normalized cache key. + */ + private function normalizeCacheKey(string $path): string + { + if (isset($path[0]) && $path[0] === '$') { + $path = substr($path, 1); + if (isset($path[0]) && $path[0] === '.') { + $path = substr($path, 1); + } + } + + return $path; + } + /** * Recursively write a value at the given key path. * diff --git a/packages/php/src/Parser/Yaml/YamlParser.php b/packages/php/src/Parser/Yaml/YamlParser.php index 2e26968..a7e9910 100644 --- a/packages/php/src/Parser/Yaml/YamlParser.php +++ b/packages/php/src/Parser/Yaml/YamlParser.php @@ -95,6 +95,16 @@ private function assertNoUnsafeConstructs(array $lines): void "YAML merge keys (<<) are not supported (line " . ($lineNum + 1) . ")." ); } + + // Also block merge keys used as flow-map keys ("{<<:" or ", <<:"), + // which the line-start check above would miss. The negative + // lookbehind keeps the match in genuine key position and out of + // adjacent quoted regions; rejection is fail-closed. + if (preg_match('/(? Parsed key-value pairs. @@ -547,11 +562,11 @@ private function parseFlowMap(string $value): array $items = $this->splitFlowItems($inner); foreach ($items as $item) { $item = trim($item); - $colonPos = strpos($item, ':'); - if ($colonPos === false) { + $colonPos = $this->findFlowColon($item); + if ($colonPos < 0) { continue; } - $key = trim(substr($item, 0, $colonPos)); + $key = $this->unquoteKey(trim(substr($item, 0, $colonPos))); $val = trim(substr($item, $colonPos + 1)); $result[$key] = $this->castScalar($val); } @@ -559,6 +574,70 @@ private function parseFlowMap(string $value): array return $result; } + /** + * Find the first colon outside of quoted regions in a flow-map item. + * + * A naive search would split on a colon inside a quoted key + * (e.g. {"a:b": v}), corrupting both key and value. + * + * @param string $item Single flow-map item (key/value pair). + * + * @return int Index of the separating colon, or -1 if none is found. + */ + private function findFlowColon(string $item): int + { + $inQuote = false; + $quoteChar = ''; + $len = strlen($item); + + for ($i = 0; $i < $len; $i++) { + $ch = $item[$i]; + + if ($inQuote) { + if ($ch === $quoteChar) { + $inQuote = false; + } + continue; + } + + if ($ch === '"' || $ch === "'") { + $inQuote = true; + $quoteChar = $ch; + continue; + } + + if ($ch === ':') { + return $i; + } + } + + return -1; + } + + /** + * Strip a single matching pair of surrounding quotes from a flow-map key. + * + * Keys are always strings, so no scalar casting is applied; only the + * outer quotes are removed (and doubled single-quotes unescaped). + * + * @param string $key Raw flow-map key, possibly quoted. + * + * @return string Unquoted key. + */ + private function unquoteKey(string $key): string + { + if (strlen($key) >= 2) { + if ($key[0] === '"' && $key[strlen($key) - 1] === '"') { + return $this->unescapeDoubleQuoted(substr($key, 1, -1)); + } + if ($key[0] === "'" && $key[strlen($key) - 1] === "'") { + return str_replace("''", "'", substr($key, 1, -1)); + } + } + + return $key; + } + /** * Split flow-syntax items by comma, respecting nested brackets and quotes. * diff --git a/packages/php/src/PathQuery/SegmentFilterParser.php b/packages/php/src/PathQuery/SegmentFilterParser.php index 71d7552..a0dae46 100644 --- a/packages/php/src/PathQuery/SegmentFilterParser.php +++ b/packages/php/src/PathQuery/SegmentFilterParser.php @@ -239,11 +239,50 @@ private function parseValueDefault(string $raw): int|float|string return substr($raw, 1, -1); } - if (is_numeric($raw)) { - return str_contains($raw, '.') ? (float) $raw : (int) $raw; + return $this->numericLiteral($raw) ?? $raw; + } + + /** + * Coerce a raw token to a number using a runtime-agnostic rule. + * + * Only plain decimal integers and decimal floats (including scientific + * notation) are treated as numbers. Hex (`0x`), binary (`0b`), octal + * (`0o`), and underscore-grouped literals are intentionally left as + * strings so PHP and JS produce identical results for untrusted input. + * + * A value that is mathematically an integer (e.g. `1e3`, `1.5e2`) is + * returned as an int so it compares equal to integer data under strict + * equality; only genuinely fractional values (e.g. `2.5`, `1e-3`) stay + * floats. This keeps PHP (int/float distinct) and JS (single number + * type) identical under `==`. + * + * @param string $raw Raw token. + * + * @return int|float|null The number, or null when the token is not numeric. + */ + private function numericLiteral(string $raw): int|float|null + { + // Plain decimal integer (handled first so the float branch below + // always carries a dot or exponent). + if (preg_match('/^[+-]?\d+$/', $raw) === 1) { + return (int) $raw; + } + + // Decimal float (with a dot, optional exponent) or an integer mantissa + // with a mandatory exponent (e.g. 1e3). Hex/binary/octal never match. + if (preg_match('/^[+-]?(?:(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)$/', $raw) === 1) { + $float = (float) $raw; + + // Collapse integral values (1e3 → 1000) to int so they match + // integer data; keep fractional values as float. + if (is_finite($float) && floor($float) === $float && abs($float) < (float) PHP_INT_MAX) { + return (int) $float; + } + + return $float; } - return $raw; + return null; } /** @@ -264,13 +303,19 @@ private function evaluateCondition(array $item, array $condition): bool $expected = $condition['value']; + // Relational operators only compare two numbers. Any other type + // combination yields false, keeping PHP and JS identical (PHP's + // native mixed-type comparison and JS coercion diverge otherwise). + $bothNumbers = (is_int($fieldValue) || is_float($fieldValue)) + && (is_int($expected) || is_float($expected)); + return match ($condition['operator']) { '==' => $fieldValue === $expected, '!=' => $fieldValue !== $expected, - '>' => $fieldValue > $expected, - '<' => $fieldValue < $expected, - '>=' => $fieldValue >= $expected, - '<=' => $fieldValue <= $expected, + '>' => $bothNumbers && $fieldValue > $expected, + '<' => $bothNumbers && $fieldValue < $expected, + '>=' => $bothNumbers && $fieldValue >= $expected, + '<=' => $bothNumbers && $fieldValue <= $expected, default => false, }; } @@ -373,8 +418,11 @@ private function resolveArithmetic(array $item, string $expr): float|int|null } $toNumber = function (string $token) use ($item): float|int|null { - if (is_numeric($token) && !str_starts_with($token, '@')) { - return str_contains($token, '.') ? (float) $token : (int) $token; + if (!str_starts_with($token, '@')) { + $literal = $this->numericLiteral($token); + if ($literal !== null) { + return $literal; + } } $val = $this->resolveFilterArg($item, $token); @@ -383,8 +431,8 @@ private function resolveArithmetic(array $item, string $expr): float|int|null return $val; } - if (is_numeric($val)) { - return str_contains((string) $val, '.') ? (float) $val : (int) $val; + if (is_string($val)) { + return $this->numericLiteral($val); } return null; diff --git a/packages/php/tests/Unit/Core/DotNotationParserTest.php b/packages/php/tests/Unit/Core/DotNotationParserTest.php index 66743f7..7271833 100644 --- a/packages/php/tests/Unit/Core/DotNotationParserTest.php +++ b/packages/php/tests/Unit/Core/DotNotationParserTest.php @@ -72,6 +72,18 @@ expect($this->cache->setCallCount)->toBe(1); }); + + it('shares one cache entry across equivalent root-prefixed paths', function (): void { + $data = ['a' => ['b' => 1]]; + + $this->parser->get($data, 'a.b'); + $this->parser->get($data, '$.a.b'); + $this->parser->get($data, '$a.b'); + + // All three normalize to the key 'a.b': parsed once, one cache entry. + expect($this->cache->setCallCount)->toBe(1); + expect(array_keys($this->cache->store))->toBe(['a.b']); + }); }); // has() diff --git a/packages/php/tests/Unit/ParityTest.php b/packages/php/tests/Unit/ParityTest.php index 5687313..5422370 100644 --- a/packages/php/tests/Unit/ParityTest.php +++ b/packages/php/tests/Unit/ParityTest.php @@ -250,6 +250,42 @@ ]); expect($accessor->get("items[?contains(@.tag, 'world')].tag"))->toBe(['hello-world']); }); + + it('matches integer data with a scientific-notation literal (1e3 == 1000)', function (): void { + $accessor = $this->inline->fromArray(['items' => [['v' => 1000], ['v' => 1]]]); + expect($accessor->get('items[?v == 1e3].v'))->toBe([1000]); + }); + + it('treats a hex literal as a string, matching only string data (0x1A)', function (): void { + $accessor = $this->inline->fromArray(['items' => [['v' => '0x1A'], ['v' => 26]]]); + expect($accessor->get('items[?v == 0x1A].v'))->toBe(['0x1A']); + }); + + it('excludes a non-numeric string from a > comparison', function (): void { + $accessor = $this->inline->fromArray(['items' => [['v' => 'abc'], ['v' => 10]]]); + expect($accessor->get('items[?v > 5].v'))->toBe([10]); + }); + + it('excludes a numeric string from a > comparison', function (): void { + $accessor = $this->inline->fromArray(['items' => [['v' => '10'], ['v' => 20]]]); + expect($accessor->get('items[?v > 5].v'))->toBe([20]); + }); +}); + +describe(Inline::class . ' > PathQuery > YAML flow map (parity)', function (): void { + beforeEach(function (): void { + $this->inline = new Inline(); + }); + + it('splits a quoted flow-map key on the first colon outside quotes', function (): void { + $accessor = $this->inline->fromYaml('data: {"key:with:colons": value}'); + expect($accessor->get('data'))->toBe(['key:with:colons' => 'value']); + }); + + it('rejects a merge key used inside a flow map', function (): void { + expect(fn () => $this->inline->fromYaml('data: {<<: {a: 1}, b: 2}')) + ->toThrow(\SafeAccess\Inline\Exceptions\YamlParseException::class); + }); }); describe(Inline::class . ' > PathQuery > multi-key and multi-index (parity)', function (): void { diff --git a/packages/php/tests/Unit/Parser/Yaml/YamlParserEdgeCasesTest.php b/packages/php/tests/Unit/Parser/Yaml/YamlParserEdgeCasesTest.php index 0c2f90b..7a1bd5f 100644 --- a/packages/php/tests/Unit/Parser/Yaml/YamlParserEdgeCasesTest.php +++ b/packages/php/tests/Unit/Parser/Yaml/YamlParserEdgeCasesTest.php @@ -102,6 +102,36 @@ expect(fn () => $this->parser->parse($yaml)) ->toThrow(YamlParseException::class); }); + + it('throws YamlParseException for a merge key used as a flow-map key', function (): void { + $yaml = "data: {<<: {a: 1}, b: 2}"; + + expect(fn () => $this->parser->parse($yaml)) + ->toThrow(YamlParseException::class, 'merge keys (<<) are not supported'); + }); + + it('throws YamlParseException for a merge key as a later flow-map key', function (): void { + $yaml = "data: {a: 1, <<: {b: 2}}"; + + expect(fn () => $this->parser->parse($yaml)) + ->toThrow(YamlParseException::class, 'merge keys (<<) are not supported'); + }); + + it('does not throw for the "<<" text inside a quoted string value', function (): void { + $yaml = "note: use <<: syntax"; + + $result = $this->parser->parse($yaml); + + expect($result)->toBe(['note' => 'use <<: syntax']); + }); + + it('does not treat a flow-map key containing a single "<" as a merge key', function (): void { + $yaml = "expr: {a: 1, b: 2}"; + + $result = $this->parser->parse($yaml); + + expect($result['expr'])->toBe(['a' => 1, 'b' => 2]); + }); }); // parse() - coverage-gap scenarios @@ -297,6 +327,67 @@ expect($result['meta']['other'])->toBe(2); }); + it('splits on the first colon outside quotes for a double-quoted flow-map key', function (): void { + // findFlowColon: a colon inside a quoted key must not split the pair + $yaml = "data: {\"key:with:colons\": value}"; + + $result = $this->parser->parse($yaml); + + expect($result['data'])->toBe(['key:with:colons' => 'value']); + }); + + it('unquotes a single-quoted flow-map key containing a colon', function (): void { + $yaml = "data: {'a:b': v}"; + + $result = $this->parser->parse($yaml); + + expect($result['data'])->toBe(['a:b' => 'v']); + }); + + it('keeps a flow-map value containing a colon (URL) intact', function (): void { + // Regression: the first colon (after url) is outside quotes + $yaml = "m: {url: http://x}"; + + $result = $this->parser->parse($yaml); + + expect($result['m'])->toBe(['url' => 'http://x']); + }); + + it('treats a leading colon in a flow-map item as an empty key', function (): void { + // findFlowColon returns 0; the empty key is kept (colonPos < 0 guard) + $yaml = "d: {: value}"; + + $result = $this->parser->parse($yaml); + + expect($result['d'])->toBe(['' => 'value']); + }); + + it('unquotes an empty double-quoted flow-map key', function (): void { + // unquoteKey strips a 2-char quoted key ("") to an empty string + $yaml = "d: {\"\": v}"; + + $result = $this->parser->parse($yaml); + + expect($result['d'])->toBe(['' => 'v']); + }); + + it('unescapes doubled single quotes inside a single-quoted flow-map key', function (): void { + $yaml = "d: {'a''b': v}"; + + $result = $this->parser->parse($yaml); + + expect($result['d'])->toBe(["a'b" => 'v']); + }); + + it('ignores a flow-map item whose only colon is inside an unterminated quote', function (): void { + // The colon sits inside the quoted region, so no separator is found + $yaml = "d: {\"ab: v}"; + + $result = $this->parser->parse($yaml); + + expect($result['d'])->toBe([]); + }); + it('handles quoted strings inside a flow sequence', function (): void { // Lines 556-567: splitFlowItems - quoted string tracking $yaml = 'items: [a, "b,c", d]'; diff --git a/packages/php/tests/Unit/PathQuery/SegmentFilterParserTest.php b/packages/php/tests/Unit/PathQuery/SegmentFilterParserTest.php index 4c98315..eb32715 100644 --- a/packages/php/tests/Unit/PathQuery/SegmentFilterParserTest.php +++ b/packages/php/tests/Unit/PathQuery/SegmentFilterParserTest.php @@ -374,4 +374,96 @@ expect($parser->evaluate(['other' => 'data'], $expr))->toBeFalse(); }); }); + + describe(SegmentFilterParser::class . ' > numeric literal coercion', function (): void { + $value = function (string $expr): mixed { + $parser = new SegmentFilterParser(new SecurityGuard()); + return $parser->parse($expr)['conditions'][0]['value']; + }; + + it('parses scientific notation as a number (1e3 -> 1000)', function () use ($value): void { + expect($value('v==1e3'))->toBe(1000); + }); + + it('parses integral scientific notation as an integer (1.5e2 -> 150)', function () use ($value): void { + expect($value('v==1.5e2'))->toBe(150); + }); + + it('keeps a fractional value as a float (2.5)', function () use ($value): void { + expect($value('v==2.5'))->toBe(2.5); + }); + + it('keeps a negative-exponent value fractional (1e-3 -> 0.001)', function () use ($value): void { + expect($value('v==1e-3'))->toBe(0.001); + }); + + it('preserves a leading-zero decimal integer (007 -> 7)', function () use ($value): void { + expect($value('v==007'))->toBe(7); + }); + + it('leaves a hexadecimal literal as a string (0x1A)', function () use ($value): void { + expect($value('v==0x1A'))->toBe('0x1A'); + }); + + it('leaves a binary literal as a string (0b101)', function () use ($value): void { + expect($value('v==0b101'))->toBe('0b101'); + }); + + it('leaves an octal literal as a string (0o17)', function () use ($value): void { + expect($value('v==0o17'))->toBe('0o17'); + }); + + it('leaves an underscore-grouped literal as a string (1_000)', function () use ($value): void { + expect($value('v==1_000'))->toBe('1_000'); + }); + + it('matches integer data with a scientific-notation literal', function (): void { + $parser = new SegmentFilterParser(new SecurityGuard()); + expect($parser->evaluate(['v' => 1000], $parser->parse('v==1e3')))->toBeTrue(); + }); + + it('uses the same numeric rule inside arithmetic predicates', function (): void { + $parser = new SegmentFilterParser(new SecurityGuard()); + expect($parser->evaluate(['v' => 1000], $parser->parse('v*1==1e3')))->toBeTrue(); + }); + }); + + describe(SegmentFilterParser::class . ' > relational operators require numbers', function (): void { + $evaluate = function (string $expr, array $item): bool { + $parser = new SegmentFilterParser(new SecurityGuard()); + return $parser->evaluate($item, $parser->parse($expr)); + }; + + it('returns false comparing a non-numeric string with > to a number', function () use ($evaluate): void { + expect($evaluate('v>5', ['v' => 'abc']))->toBeFalse(); + }); + + it('returns false comparing an empty string with >= to zero', function () use ($evaluate): void { + expect($evaluate('v>=0', ['v' => '']))->toBeFalse(); + }); + + it('returns false comparing a numeric string with > to a number', function () use ($evaluate): void { + expect($evaluate('v>5', ['v' => '10']))->toBeFalse(); + }); + + it('returns false comparing null with > to a number', function () use ($evaluate): void { + expect($evaluate('v>0', ['v' => null]))->toBeFalse(); + }); + + it('returns false comparing an array with > to a number', function () use ($evaluate): void { + expect($evaluate('v>1', ['v' => [1, 2]]))->toBeFalse(); + }); + + it('still compares two numbers with > (regression)', function () use ($evaluate): void { + expect($evaluate('age>18', ['age' => 30]))->toBeTrue(); + }); + + it('still compares two numbers with <= (regression)', function () use ($evaluate): void { + expect($evaluate('age<=30', ['age' => 30]))->toBeTrue(); + }); + + it('still compares two floats with >= (regression)', function () use ($evaluate): void { + expect($evaluate('s>=9.5', ['s' => 9.5]))->toBeTrue(); + }); + }); });