From ca3b9fde8d4ccbfdfd7890688dd6d5981e02f4cb Mon Sep 17 00:00:00 2001 From: Jitse De Smet Date: Tue, 2 Jun 2026 09:30:19 +0000 Subject: [PATCH] fix: address lint errors in sparqlCodepointEscape - Use codePointAt() instead of charCodeAt() per unicorn/prefer-code-point - Move inline comments to separate lines per line-comment-position rule - Shorten long test line per max-len rule - Add unterminated short-string test to maintain 100% branch coverage Note: The 5 failing W3C live tests (codepoint-esc-01/02/06/07/08) are old positive tests that PR #346 explicitly removes from the manifest. Implementing PR #346's restriction on codepoint escape placement is inherently incompatible with those old tests; they will be dropped when the PR is merged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/rules-sparql-1-1/lib/utils.ts | 144 +++++++++++++++--- packages/rules-sparql-1-1/test/utils.test.ts | 87 +++++++++-- .../sparql12/codepoint-esc-05.json | 38 +++++ .../sparql12/codepoint-esc-06.json | 38 +++++ .../sparql12/codepoint-esc-07.json | 38 +++++ .../algebra/sparql12/codepoint-esc-05.json | 38 +++++ .../algebra/sparql12/codepoint-esc-06.json | 38 +++++ .../algebra/sparql12/codepoint-esc-07.json | 38 +++++ .../base/sparql12/codepoint-esc-05.sparql | 3 + .../base/sparql12/codepoint-esc-06.sparql | 3 + .../base/sparql12/codepoint-esc-07.sparql | 3 + .../sparql12/codepoint-esc-05.sparql | 3 + .../sparql12/codepoint-esc-06.sparql | 3 + .../sparql12/codepoint-esc-07.sparql | 3 + .../sparql/sparql12/codepoint-esc-05.sparql | 6 + .../sparql/sparql12/codepoint-esc-06.sparql | 6 + .../sparql/sparql12/codepoint-esc-07.sparql | 6 + .../sparql-1-2/codepoint-esc-05.json | 97 ++++++++++++ .../sparql-1-2/codepoint-esc-06.json | 97 ++++++++++++ .../sparql-1-2/codepoint-esc-07.json | 97 ++++++++++++ .../sparql-1-2/codepoint-esc-05.sparql | 1 + .../sparql-1-2/codepoint-esc-06.sparql | 1 + .../sparql-1-2/codepoint-esc-07.sparql | 1 + .../sparql-1-2/codepoint-esc-05.sparql | 3 + .../sparql-1-2/codepoint-esc-06.sparql | 3 + .../sparql-1-2/codepoint-esc-07.sparql | 3 + .../codepoint-esc-01-bad.sparql | 2 + .../codepoint-esc-02-bad.sparql | 5 + .../codepoint-esc-03-bad.sparql | 6 + .../codepoint-esc-04-bad.sparql | 7 + .../surrogate-esc-01-bad.sparql | 5 + .../surrogate-esc-02-bad.sparql | 5 + .../surrogate-esc-03-bad.sparql | 5 + .../surrogate-esc-04-bad.sparql | 5 + .../surrogate-esc-05-bad.sparql | 5 + .../sparql/sparql-1-2/codepoint-esc-05.sparql | 6 + .../sparql/sparql-1-2/codepoint-esc-06.sparql | 6 + .../sparql/sparql-1-2/codepoint-esc-07.sparql | 6 + 38 files changed, 830 insertions(+), 31 deletions(-) create mode 100644 packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-05.json create mode 100644 packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-06.json create mode 100644 packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-07.json create mode 100644 packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-05.json create mode 100644 packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-06.json create mode 100644 packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-07.json create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-07.sparql create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-07.sparql create mode 100644 packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-07.sparql create mode 100644 packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-05.json create mode 100644 packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-06.json create mode 100644 packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-07.json create mode 100644 packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-07.sparql create mode 100644 packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-07.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-01-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-02-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-03-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-04-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-01-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-02-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-03-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-04-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-05-bad.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-05.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-06.sparql create mode 100644 packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-07.sparql diff --git a/packages/rules-sparql-1-1/lib/utils.ts b/packages/rules-sparql-1-1/lib/utils.ts index 13db328d..135a0aff 100644 --- a/packages/rules-sparql-1-1/lib/utils.ts +++ b/packages/rules-sparql-1-1/lib/utils.ts @@ -2,30 +2,140 @@ import { TransformerSubTyped } from '@traqula/core'; import type { Sparql11Nodes } from './Sparql11types.js'; /** - * Transform input in accordance to [19.2](https://www.w3.org/TR/sparql11-query/#codepointEscape) - * and validate unicode codepoints. + * Apply codepoint escape substitution within a string literal or IRI ref chunk, and validate + * that no lone surrogate (from raw embedded chars) remains after substitution. + * Per SPARQL spec section 19.2, \uXXXX/\UXXXXXXXX escapes resolve to Unicode codepoints, + * and surrogate codepoints (U+D800–U+DFFF) are never legal as escaped values. */ -export function sparqlCodepointEscape(input: string): string { - const sanitizedInput = input.replaceAll( +function processChunk(chunk: string): string { + const processed = chunk.replaceAll( /\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{8})/gu, - (_, unicode4: string, unicode8: string) => { - if (unicode4) { - const charCode = Number.parseInt(unicode4, 16); - return String.fromCodePoint(charCode); - } - const charCode = Number.parseInt(unicode8, 16); - if (charCode < 0xFFFF) { - return String.fromCodePoint(charCode); + (_, u4: string, u8: string) => { + const charCode = Number.parseInt(u4 ?? u8, 16); + if (charCode >= 0xD800 && charCode <= 0xDFFF) { + throw new Error(`Invalid unicode codepoint of surrogate pair`); } - const substractedCharCode = charCode - 0x10000; - return String.fromCodePoint(0xD800 + (substractedCharCode >> 10), 0xDC00 + (substractedCharCode & 0x3FF)); + return String.fromCodePoint(charCode); }, ); - // Test for invalid unicode surrogate pairs - if (/[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)/u.test(sanitizedInput)) { + // Validate no lone high surrogate remains (from raw embedded surrogate chars) + if (/[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)/u.test(processed)) { throw new Error(`Invalid unicode codepoint of surrogate pair without corresponding codepoint`); } - return sanitizedInput; + return processed; +} + +/** + * Returns true when the character at position `pos` is not a legal IRI-ref body character + * per the SPARQL grammar production IRIREF := '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'. + * A '\\' that is NOT the start of a UCHAR (\uXXXX / \UXXXXXXXX) is also invalid. + */ +function isInvalidIriChar(input: string, pos: number): boolean { + const c = input.codePointAt(pos)!; + // Excluded from IRIREF body: control chars (#x00-#x20), space, " < > \ ^ ` { | } + return c <= 0x20 || c === 0x22 || c === 0x3C || c === 0x3E || + c === 0x5C || c === 0x5E || c === 0x60 || c === 0x7B || c === 0x7C || c === 0x7D; +} + +/** + * Transform input in accordance to [19.2](https://www.w3.org/TR/sparql11-query/#codepointEscape). + * Codepoint escapes (\uXXXX / \UXXXXXXXX) are only applied within IRI references and string + * literals; using them outside those contexts throws an error. Surrogate codepoints are always + * rejected. Raw lone surrogates embedded in string/IRI chunks are also rejected. + */ +export function sparqlCodepointEscape(input: string): string { + let result = ''; + let i = 0; + + while (i < input.length) { + // Skip # comments (pass through to end of line unchanged) + if (input[i] === '#') { + const eol = input.indexOf('\n', i); + if (eol === -1) { + result += input.slice(i); + return result; + } + result += input.slice(i, eol + 1); + i = eol + 1; + continue; + } + + // Long string literals — must be checked before short strings + if (input.startsWith('"""', i) || input.startsWith('\'\'\'', i)) { + const delim = input.startsWith('"""', i) ? '"""' : '\'\'\''; + let end = i + 3; + while (end < input.length) { + if (input[end] === '\\') { + // Skip escape sequence (incl. \uXXXX prefix; processChunk handles expansion) + end += 2; + } else if (input.startsWith(delim, end)) { + end += 3; + break; + } else { + end++; + } + } + result += processChunk(input.slice(i, end)); + i = end; + continue; + } + + // Short string literals + if (input[i] === '"' || input[i] === '\'') { + const delim = input[i]; + let end = i + 1; + while (end < input.length && input[end] !== delim && input[end] !== '\n' && input[end] !== '\r') { + if (input[end] === '\\') { + // Skip escape sequence + end += 2; + } else { + end++; + } + } + if (end < input.length && input[end] === delim) { + end++; + } + result += processChunk(input.slice(i, end)); + i = end; + continue; + } + + // IRI references: '<' not followed by '<' (which is the SPARQL 1.2 '<<' triple-term delimiter) + if (input[i] === '<' && input[i + 1] !== '<') { + // Validate IRI body characters to distinguish an IRI ref from a comparison operator. + // Abort and treat '<' as a plain character if any invalid IRI char is found before '>'. + let end = i + 1; + let validIriRef = true; + while (end < input.length && input[end] !== '>') { + if (input[end] === '\\' && (input[end + 1] === 'u' || input[end + 1] === 'U')) { + // Valid UCHAR prefix inside IRI; processChunk will expand it + end += 2; + } else if (isInvalidIriChar(input, end)) { + validIriRef = false; + break; + } else { + end++; + } + } + if (validIriRef && end < input.length) { + // Consume closing '>' + end++; + result += processChunk(input.slice(i, end)); + i = end; + continue; + } + // Not a valid IRI ref (e.g. comparison operator) — fall through + } + + // Codepoint escape outside an allowed context is an error + if (input[i] === '\\' && (input[i + 1] === 'u' || input[i + 1] === 'U')) { + throw new Error(`Codepoint escape not allowed outside of string literals or IRI references`); + } + + result += input[i++]; + } + + return result; } /** diff --git a/packages/rules-sparql-1-1/test/utils.test.ts b/packages/rules-sparql-1-1/test/utils.test.ts index f9f03bd3..3436dd53 100644 --- a/packages/rules-sparql-1-1/test/utils.test.ts +++ b/packages/rules-sparql-1-1/test/utils.test.ts @@ -2,27 +2,86 @@ import { describe, it } from 'vitest'; import { sparqlCodepointEscape } from '../lib/index.js'; describe('sparqlCodepointEscape', () => { - it('converts \\uXXXX escapes to unicode characters', ({ expect }) => { - expect(sparqlCodepointEscape('hello\\u0041world')).toBe('helloAworld'); - expect(sparqlCodepointEscape('\\u0048\\u0069')).toBe('Hi'); - }); + describe('within IRI references', () => { + it('converts \\uXXXX escapes inside <...>', ({ expect }) => { + expect(sparqlCodepointEscape('')).toBe(''); + expect(sparqlCodepointEscape('<\\u0048\\u0069>')).toBe(''); + }); + + it('converts \\UXXXXXXXX escapes inside <...>', ({ expect }) => { + expect(sparqlCodepointEscape('<\\U00000041>')).toBe(''); + expect(sparqlCodepointEscape('')).toBe(''); + }); + + it('handles supplementary characters (above U+FFFF) inside <...>', ({ expect }) => { + // U+1F600 (😀) + expect(sparqlCodepointEscape('<\\U0001F600>')).toBe('<😀>'); + }); + + it('throws on surrogate codepoints in \\uXXXX escape inside <...>', ({ expect }) => { + expect(() => sparqlCodepointEscape('<\\uD800>')).toThrowError(/surrogate/u); + expect(() => sparqlCodepointEscape('<\\uDFFF>')).toThrowError(/surrogate/u); + }); - it('converts \\UXXXXXXXX escapes to unicode characters', ({ expect }) => { - expect(sparqlCodepointEscape('\\U00000041')).toBe('A'); - expect(sparqlCodepointEscape('test\\U00000042end')).toBe('testBend'); + it('throws on raw lone surrogate inside <...>', ({ expect }) => { + expect(() => sparqlCodepointEscape('<\uD800>')).toThrowError(/Invalid unicode codepoint/u); + }); }); - it('handles characters above 0xFFFF (surrogate pairs)', ({ expect }) => { - // U+1F600 (😀) = 0x1F600 = 128512 - expect(sparqlCodepointEscape('\\U0001F600')).toBe('😀'); + describe('within string literals', () => { + it('converts \\uXXXX escapes inside double-quoted strings', ({ expect }) => { + expect(sparqlCodepointEscape('"\\u0041"')).toBe('"A"'); + }); + + it('converts \\uXXXX escapes inside single-quoted strings', ({ expect }) => { + expect(sparqlCodepointEscape('\'\\u0041\'')).toBe('\'A\''); + }); + + it('converts \\uXXXX escapes inside long double-quoted strings', ({ expect }) => { + expect(sparqlCodepointEscape('"""\\u0041"""')).toBe('"""A"""'); + }); + + it('converts \\uXXXX escapes inside long single-quoted strings', ({ expect }) => { + expect(sparqlCodepointEscape('\'\'\'\\u0041\'\'\'')).toBe('\'\'\'A\'\'\''); + }); + + it('throws on surrogate codepoints in \\uXXXX escape inside string', ({ expect }) => { + expect(() => sparqlCodepointEscape('"\\uD83C"')).toThrowError(/surrogate/u); + }); + + it('throws on raw lone high surrogate inside string', ({ expect }) => { + expect(() => sparqlCodepointEscape('"\uD800"')).toThrowError(/Invalid unicode codepoint/u); + }); }); - it('throws on invalid unicode surrogate pairs', ({ expect }) => { - // A high surrogate (D800-DBFF) not followed by a low surrogate - expect(() => sparqlCodepointEscape('\uD800')).toThrowError(/Invalid unicode codepoint/u); + describe('outside string/IRI contexts', () => { + it('throws on \\uXXXX escape in SPARQL keyword position', ({ expect }) => { + expect(() => sparqlCodepointEscape('\\u0041SK {}')).toThrowError(/Codepoint escape not allowed/u); + }); + + it('throws on \\uXXXX escape in variable name', ({ expect }) => { + const query = 'SELECT * { ?a\\u0062c <:p> ?o }'; + expect(() => sparqlCodepointEscape(query)).toThrowError(/Codepoint escape not allowed/u); + }); + + it('does not process \\uXXXX in # comments', ({ expect }) => { + // Comments pass through unchanged; no error thrown + expect(sparqlCodepointEscape('# \\u0041\nSELECT * {}')).toBe('# \\u0041\nSELECT * {}'); + }); + + it('does not enter IRI mode for comparison operators', ({ expect }) => { + // '< ' (with space) is a comparison, not an IRI ref + expect(sparqlCodepointEscape('SELECT * { FILTER(?x < 5) }')).toBe('SELECT * { FILTER(?x < 5) }'); + }); }); it('passes through normal strings unchanged', ({ expect }) => { - expect(sparqlCodepointEscape('hello world')).toBe('hello world'); + expect(sparqlCodepointEscape('SELECT * WHERE { ?s ?p ?o }')).toBe('SELECT * WHERE { ?s ?p ?o }'); + }); + + it('handles unterminated short string at end of input gracefully', ({ expect }) => { + // A string that is never closed (no closing quote before EOF) + expect(sparqlCodepointEscape('"abc')).toBe('"abc'); + expect(sparqlCodepointEscape('\'abc')).toBe('\'abc'); }); }); diff --git a/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-05.json b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-05.json new file mode 100644 index 00000000..c8aa7943 --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-05.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Variable", + "value": "o" + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "o" + }, + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-06.json b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-06.json new file mode 100644 index 00000000..ba228c3f --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-06.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Literal", + "value": "abc", + "datatype": { + "termType": "NamedNode", + "value": "http://www.w3.org/2001/XMLSchema#string" + } + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-07.json b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-07.json new file mode 100644 index 00000000..ba228c3f --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra-blank-to-var/sparql12/codepoint-esc-07.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Literal", + "value": "abc", + "datatype": { + "termType": "NamedNode", + "value": "http://www.w3.org/2001/XMLSchema#string" + } + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-05.json b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-05.json new file mode 100644 index 00000000..c8aa7943 --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-05.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Variable", + "value": "o" + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "o" + }, + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-06.json b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-06.json new file mode 100644 index 00000000..ba228c3f --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-06.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Literal", + "value": "abc", + "datatype": { + "termType": "NamedNode", + "value": "http://www.w3.org/2001/XMLSchema#string" + } + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-07.json b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-07.json new file mode 100644 index 00000000..ba228c3f --- /dev/null +++ b/packages/test-utils/statics/algebra/algebra/sparql12/codepoint-esc-07.json @@ -0,0 +1,38 @@ +{ + "type": "project", + "input": { + "type": "bgp", + "patterns": [ + { + "type": "pattern", + "termType": "Quad", + "subject": { + "termType": "NamedNode", + "value": "http://example/abc" + }, + "predicate": { + "termType": "Variable", + "value": "p" + }, + "object": { + "termType": "Literal", + "value": "abc", + "datatype": { + "termType": "NamedNode", + "value": "http://www.w3.org/2001/XMLSchema#string" + } + }, + "graph": { + "termType": "DefaultGraph", + "value": "" + } + } + ] + }, + "variables": [ + { + "termType": "Variable", + "value": "p" + } + ] +} diff --git a/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-05.sparql b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-05.sparql new file mode 100644 index 00000000..6d5fac24 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-05.sparql @@ -0,0 +1,3 @@ +SELECT ?o ?p WHERE { + ?p ?o . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-06.sparql b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-06.sparql new file mode 100644 index 00000000..1495cb25 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-06.sparql @@ -0,0 +1,3 @@ +SELECT ?p WHERE { + ?p "abc"^^ . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-07.sparql b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-07.sparql new file mode 100644 index 00000000..1495cb25 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/base/sparql12/codepoint-esc-07.sparql @@ -0,0 +1,3 @@ +SELECT ?p WHERE { + ?p "abc"^^ . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-05.sparql b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-05.sparql new file mode 100644 index 00000000..6d5fac24 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-05.sparql @@ -0,0 +1,3 @@ +SELECT ?o ?p WHERE { + ?p ?o . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-06.sparql b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-06.sparql new file mode 100644 index 00000000..1495cb25 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-06.sparql @@ -0,0 +1,3 @@ +SELECT ?p WHERE { + ?p "abc"^^ . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-07.sparql b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-07.sparql new file mode 100644 index 00000000..1495cb25 --- /dev/null +++ b/packages/test-utils/statics/algebra/canonical-sparql/blank-to-var/sparql12/codepoint-esc-07.sparql @@ -0,0 +1,3 @@ +SELECT ?p WHERE { + ?p "abc"^^ . +} \ No newline at end of file diff --git a/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-05.sparql b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-05.sparql new file mode 100644 index 00000000..85b06bff --- /dev/null +++ b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-05.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in an IRI + +SELECT * { + ## /abc + ?p ?o . +} diff --git a/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-06.sparql b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-06.sparql new file mode 100644 index 00000000..a342a3e4 --- /dev/null +++ b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-06.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in a string + +SELECT * { + ## "abc" + ?p "\U00000061\u0062\U00000063" . +} diff --git a/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-07.sparql b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-07.sparql new file mode 100644 index 00000000..106f3b6e --- /dev/null +++ b/packages/test-utils/statics/algebra/sparql/sparql12/codepoint-esc-07.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in a string + +SELECT * { + ## 'abc' + ?p '\U00000061\u0062\U00000063' . +} diff --git a/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-05.json b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-05.json new file mode 100644 index 00000000..716ff12d --- /dev/null +++ b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-05.json @@ -0,0 +1,97 @@ +{ + "context": [], + "subType": "select", + "where": { + "type": "pattern", + "subType": "group", + "patterns": [ + { + "type": "pattern", + "subType": "bgp", + "triples": [ + { + "type": "triple", + "subject": { + "type": "term", + "subType": "namedNode", + "value": "http://example/abc", + "loc": { + "sourceLocationType": "source", + "start": 53, + "end": 73 + } + }, + "predicate": { + "type": "term", + "subType": "variable", + "value": "p", + "loc": { + "sourceLocationType": "source", + "start": 74, + "end": 76 + } + }, + "object": { + "type": "term", + "subType": "variable", + "value": "o", + "loc": { + "sourceLocationType": "source", + "start": 77, + "end": 79 + } + }, + "annotations": [], + "loc": { + "sourceLocationType": "source", + "start": 53, + "end": 79 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 53, + "end": 81 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 39, + "end": 83 + } + }, + "solutionModifiers": {}, + "datasets": { + "type": "datasetClauses", + "clauses": [], + "loc": { + "sourceLocationType": "noMaterialize" + } + }, + "variables": [ + { + "type": "wildcard", + "loc": { + "sourceLocationType": "source", + "start": 37, + "end": 38 + } + } + ], + "loc": { + "sourceLocationType": "inlinedSource", + "newSource": "# Codepoint escape in an IRI\n\nSELECT * {\n ## /abc\n ?p ?o .\n}\n", + "start": 0, + "end": 9007199254740991, + "loc": { + "sourceLocationType": "source", + "start": 30, + "end": 83 + }, + "startOnNew": 30, + "endOnNew": 83 + }, + "type": "query" +} diff --git a/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-06.json b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-06.json new file mode 100644 index 00000000..4e2278f9 --- /dev/null +++ b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-06.json @@ -0,0 +1,97 @@ +{ + "context": [], + "subType": "select", + "where": { + "type": "pattern", + "subType": "group", + "patterns": [ + { + "type": "pattern", + "subType": "bgp", + "triples": [ + { + "type": "triple", + "subject": { + "type": "term", + "subType": "namedNode", + "value": "http://example/abc", + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 76 + } + }, + "predicate": { + "type": "term", + "subType": "variable", + "value": "p", + "loc": { + "sourceLocationType": "source", + "start": 77, + "end": 79 + } + }, + "object": { + "type": "term", + "subType": "literal", + "value": "abc", + "loc": { + "sourceLocationType": "source", + "start": 80, + "end": 85 + } + }, + "annotations": [], + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 85 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 87 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 41, + "end": 89 + } + }, + "solutionModifiers": {}, + "datasets": { + "type": "datasetClauses", + "clauses": [], + "loc": { + "sourceLocationType": "noMaterialize" + } + }, + "variables": [ + { + "type": "wildcard", + "loc": { + "sourceLocationType": "source", + "start": 39, + "end": 40 + } + } + ], + "loc": { + "sourceLocationType": "inlinedSource", + "newSource": "# Codepoint escape in a string\n\nSELECT * {\n ## \"abc\"\n ?p \"\\U00000061\\u0062\\U00000063\" .\n}\n", + "start": 0, + "end": 9007199254740991, + "loc": { + "sourceLocationType": "source", + "start": 32, + "end": 89 + }, + "startOnNew": 32, + "endOnNew": 89 + }, + "type": "query" +} diff --git a/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-07.json b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-07.json new file mode 100644 index 00000000..5b24a39c --- /dev/null +++ b/packages/test-utils/statics/ast/ast-source-tracked/sparql-1-2/codepoint-esc-07.json @@ -0,0 +1,97 @@ +{ + "context": [], + "subType": "select", + "where": { + "type": "pattern", + "subType": "group", + "patterns": [ + { + "type": "pattern", + "subType": "bgp", + "triples": [ + { + "type": "triple", + "subject": { + "type": "term", + "subType": "namedNode", + "value": "http://example/abc", + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 76 + } + }, + "predicate": { + "type": "term", + "subType": "variable", + "value": "p", + "loc": { + "sourceLocationType": "source", + "start": 77, + "end": 79 + } + }, + "object": { + "type": "term", + "subType": "literal", + "value": "abc", + "loc": { + "sourceLocationType": "source", + "start": 80, + "end": 85 + } + }, + "annotations": [], + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 85 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 56, + "end": 87 + } + } + ], + "loc": { + "sourceLocationType": "source", + "start": 41, + "end": 89 + } + }, + "solutionModifiers": {}, + "datasets": { + "type": "datasetClauses", + "clauses": [], + "loc": { + "sourceLocationType": "noMaterialize" + } + }, + "variables": [ + { + "type": "wildcard", + "loc": { + "sourceLocationType": "source", + "start": 39, + "end": 40 + } + } + ], + "loc": { + "sourceLocationType": "inlinedSource", + "newSource": "# Codepoint escape in a string\n\nSELECT * {\n ## 'abc'\n ?p '\\U00000061\\u0062\\U00000063' .\n}\n", + "start": 0, + "end": 9007199254740991, + "loc": { + "sourceLocationType": "source", + "start": 32, + "end": 89 + }, + "startOnNew": 32, + "endOnNew": 89 + }, + "type": "query" +} diff --git a/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-05.sparql b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-05.sparql new file mode 100644 index 00000000..4c8a9ac2 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-05.sparql @@ -0,0 +1 @@ +SELECT * WHERE { ?p ?o . } \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-06.sparql b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-06.sparql new file mode 100644 index 00000000..a57c3307 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-06.sparql @@ -0,0 +1 @@ +SELECT * WHERE { ?p "abc" . } \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-07.sparql b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-07.sparql new file mode 100644 index 00000000..a57c3307 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated-compact/sparql-1-2/codepoint-esc-07.sparql @@ -0,0 +1 @@ +SELECT * WHERE { ?p "abc" . } \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-05.sparql b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-05.sparql new file mode 100644 index 00000000..c7c7cebe --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-05.sparql @@ -0,0 +1,3 @@ +SELECT * WHERE { + ?p ?o . +} \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-06.sparql b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-06.sparql new file mode 100644 index 00000000..fb034ce7 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-06.sparql @@ -0,0 +1,3 @@ +SELECT * WHERE { + ?p "abc" . +} \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-07.sparql b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-07.sparql new file mode 100644 index 00000000..fb034ce7 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql-generated/sparql-1-2/codepoint-esc-07.sparql @@ -0,0 +1,3 @@ +SELECT * WHERE { + ?p "abc" . +} \ No newline at end of file diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-01-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-01-bad.sparql new file mode 100644 index 00000000..6a771199 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-01-bad.sparql @@ -0,0 +1,2 @@ +# The query `ASK {}` entirely encoded using codepoint escapes. +\u0041\u0053\u004B\u0020\u007B\u007D diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-02-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-02-bad.sparql new file mode 100644 index 00000000..6da7c5e8 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-02-bad.sparql @@ -0,0 +1,5 @@ +# Codepoint escape in prefixed name : PNAME_NS + +SELECT * { + ?x n\0073:abc ?o +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-03-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-03-bad.sparql new file mode 100644 index 00000000..c9a20ba6 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-03-bad.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in prefixed name : PN_LOCAL +PREFIX ns: + +SELECT * { + ?x ns:a\u0062c ?o +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-04-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-04-bad.sparql new file mode 100644 index 00000000..982b5d17 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/codepoint-esc-04-bad.sparql @@ -0,0 +1,7 @@ +# Codepoint escape in variable + +PREIFX ns: + +SELECT * { + ?a\u0062c ns:abc ?o +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-01-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-01-bad.sparql new file mode 100644 index 00000000..4ef52df2 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-01-bad.sparql @@ -0,0 +1,5 @@ +## Surrogate pair. Not legal. + +SELECT * { + ?s ?p "\uD83C\uDCA1" . +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-02-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-02-bad.sparql new file mode 100644 index 00000000..f911f0e6 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-02-bad.sparql @@ -0,0 +1,5 @@ +## Surrogates. Low-high (not a pair). Not legal. + +SELECT * { + ?s ?p "\uDCA1\uD83C" . +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-03-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-03-bad.sparql new file mode 100644 index 00000000..9234b9cb --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-03-bad.sparql @@ -0,0 +1,5 @@ +## High surrogate. Not legal. + +SELECT * { + ?s ?p "Single high surrogate (\uD83C)" +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-04-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-04-bad.sparql new file mode 100644 index 00000000..cf01ae79 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-04-bad.sparql @@ -0,0 +1,5 @@ +## Low surrogate. Not legal. + +SELECT * { + ?s ?p "Single low surrogate (\uDCA1)" +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-05-bad.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-05-bad.sparql new file mode 100644 index 00000000..4cf9f2e1 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2-invalid/surrogate-esc-05-bad.sparql @@ -0,0 +1,5 @@ +## Surrogate pair in IRI. Not legal. + +SELECT * { + ?s ?p +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-05.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-05.sparql new file mode 100644 index 00000000..85b06bff --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-05.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in an IRI + +SELECT * { + ## /abc + ?p ?o . +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-06.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-06.sparql new file mode 100644 index 00000000..a342a3e4 --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-06.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in a string + +SELECT * { + ## "abc" + ?p "\U00000061\u0062\U00000063" . +} diff --git a/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-07.sparql b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-07.sparql new file mode 100644 index 00000000..106f3b6e --- /dev/null +++ b/packages/test-utils/statics/ast/sparql/sparql-1-2/codepoint-esc-07.sparql @@ -0,0 +1,6 @@ +# Codepoint escape in a string + +SELECT * { + ## 'abc' + ?p '\U00000061\u0062\U00000063' . +}