diff --git a/Makefile b/Makefile index 350566e..4ed287a 100644 --- a/Makefile +++ b/Makefile @@ -11,5 +11,5 @@ lint-fix: test: uv run pytest -test-coverage: +test-cov: uv run coverage run -m pytest -q && uv run coverage report --fail-under=90 diff --git a/README.md b/README.md index 43176a0..d460179 100644 --- a/README.md +++ b/README.md @@ -266,6 +266,7 @@ rendered.normalized_lines # List of normalized lines # Parsing - Full Document rendered.as_json() # Parse as JSON +rendered.as_json(allow_comments=True) # Parse JSON with // and /* */ comments rendered.as_yaml() # Parse as YAML (requires pyyaml) rendered.as_xml(strict=False) # Parse as XML (strict=True for single root) rendered.as_markdown_sections() # Parse markdown headings @@ -273,6 +274,7 @@ rendered.markdown_section("title") # Find markdown section by title # Parsing - Fenced Code Blocks rendered.as_json_blocks() # Extract all ```json blocks +rendered.as_json_blocks(allow_comments=True) # With comment support rendered.as_yaml_blocks() # Extract all ```yaml blocks rendered.as_xml_blocks() # Extract all ```xml blocks @@ -332,32 +334,25 @@ In templates, use comment-based markers to define sections and trace events: Comment markers are automatically transformed when `test_mode=True`. This allows jinjatest to be a dev-only dependency since the comments are valid Jinja syntax that render as empty strings in production. -#### Using with Any Jinja Environment - -You can add instrumentation to any Jinja environment using `instrument()`: +You can also use a pre-configured Jinja environment with `TemplateSpec`: ```python from jinja2 import Environment, FileSystemLoader -from jinjatest import TemplateSpec, instrument +from jinjatest import TemplateSpec -# Patch any existing Jinja environment +# Use your own Jinja environment env = Environment(loader=FileSystemLoader("templates/")) -instrument(env) # Adds `jt` global +env.globals["my_filter"] = lambda x: x.upper() -# Load template with comment markers transformed +# TemplateSpec handles instrumentation automatically spec = TemplateSpec.from_file("my_template.j2", env=env) rendered = spec.render({"name": "World"}) # Check traces after rendering if rendered.has_trace("some_event"): print("Event was triggered") - -# For production, use test_mode=False (markers become no-ops) -instrument(env, test_mode=False) ``` -This is useful when you want to add instrumentation to an existing Jinja setup. - ## Pytest Integration jinjatest provides pytest fixtures automatically: diff --git a/jinjatest/__init__.py b/jinjatest/__init__.py index 170a43e..c7daa11 100644 --- a/jinjatest/__init__.py +++ b/jinjatest/__init__.py @@ -28,20 +28,13 @@ def test_welcome_pro_user(): from jinjatest.asserts import PromptAssertionError, PromptAsserts, assert_no_undefined from jinjatest.instrumentation import ( - AnchorIndex, ProductionInstrumentation, TestInstrumentation, - TraceRecorder, - create_instrumentation, - instrument, ) from jinjatest.markers import ( - MarkerTransform, TemplateMarkers, discover_markers, has_markers, - load_template_with_markers, - transform_markers, ) from jinjatest.parsers import ( FencedBlock, @@ -95,19 +88,12 @@ def test_welcome_pro_user(): "extract_fenced_blocks", "parse_fenced_blocks", "FencedBlock", - # Instrumentation - "instrument", - "create_instrumentation", + # Instrumentation (for type hints) "TestInstrumentation", "ProductionInstrumentation", - "TraceRecorder", - "AnchorIndex", # Markers (comment-based) - "transform_markers", "has_markers", "discover_markers", - "load_template_with_markers", - "MarkerTransform", "TemplateMarkers", # Utilities "normalize_text", diff --git a/jinjatest/instrumentation.py b/jinjatest/instrumentation.py index bc7caec..fc62c86 100644 --- a/jinjatest/instrumentation.py +++ b/jinjatest/instrumentation.py @@ -10,10 +10,6 @@ import re from dataclasses import dataclass, field -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from jinja2 import Environment # Sentinel markers for anchors (using ASCII record separator character) ANCHOR_START = "\x1e" @@ -171,44 +167,3 @@ def create_instrumentation( if test_mode: return TestInstrumentation() return ProductionInstrumentation() - - -def instrument( - env: Environment, - *, - test_mode: bool = True, - global_name: str = "jt", -) -> TestInstrumentation | ProductionInstrumentation: - """Patch a Jinja environment with instrumentation. - - This function adds instrumentation to any Jinja environment, enabling - the use of anchors and traces in templates. Use this when you want to - add jinjatest instrumentation to an existing environment without using - TemplateSpec. - - Args: - env: The Jinja Environment to instrument. - test_mode: If True, enable anchors/traces. If False, no-op mode. - global_name: Name of the template global variable (default: "jt"). - - Returns: - The instrumentation instance added to the environment. - - Example: - from jinja2 import Environment, FileSystemLoader - from jinjatest import instrument - - # Patch any Jinja environment - env = Environment(loader=FileSystemLoader("templates/")) - inst = instrument(env) - - # Now templates can use {{ jt.anchor("section") }} and {{ jt.trace("event") }} - template = env.get_template("my_template.j2") - result = template.render({"name": "World"}) - - # In production, use test_mode=False for no-op instrumentation - instrument(env, test_mode=False) - """ - inst = create_instrumentation(test_mode=test_mode) - env.globals[global_name] = inst - return inst diff --git a/jinjatest/markers.py b/jinjatest/markers.py index 2d53439..0a716b5 100644 --- a/jinjatest/markers.py +++ b/jinjatest/markers.py @@ -158,11 +158,12 @@ def load_template_with_markers( Example: from jinja2 import Environment, FileSystemLoader - from jinjatest import instrument + from jinjatest.instrumentation import create_instrumentation from jinjatest.markers import load_template_with_markers env = Environment(loader=FileSystemLoader("templates/")) - inst = instrument(env) + inst = create_instrumentation(test_mode=True) + env.globals["jt"] = inst template = load_template_with_markers(env, "my_prompt.j2", inst) result = template.render({"name": "World"}) """ diff --git a/jinjatest/parsers/json_parser.py b/jinjatest/parsers/json_parser.py index 5923ad6..a0daf47 100644 --- a/jinjatest/parsers/json_parser.py +++ b/jinjatest/parsers/json_parser.py @@ -14,20 +14,101 @@ def __init__(self, message: str, original_error: Exception | None = None) -> Non self.original_error = original_error -def parse_json(text: str) -> Any: +def _is_escaped(text: str, pos: int) -> bool: + """Check if character at pos is escaped by counting preceding backslashes.""" + num_backslashes = 0 + pos -= 1 + while pos >= 0 and text[pos] == "\\": + num_backslashes += 1 + pos -= 1 + # Odd number of backslashes means the character is escaped + return num_backslashes % 2 == 1 + + +def _strip_json_comments(text: str) -> str: + """Strip C-style comments from JSON text. + + Handles: + - Single-line comments: // comment + - Multi-line comments: /* comment */ + + Properly handles comments inside strings (leaves them untouched), + including escaped quotes and escaped backslashes. + """ + result: list[str] = [] + i = 0 + n = len(text) + in_string = False + + while i < n: + char = text[i] + + # Handle string state - check for unescaped quotes + if char == '"' and not _is_escaped(text, i): + in_string = not in_string + result.append(char) + i += 1 + elif in_string: + # Inside a string - copy everything including escape sequences + result.append(char) + i += 1 + elif char == "/" and i + 1 < n: + next_char = text[i + 1] + if next_char == "/": + # Single-line comment - skip to end of line + i += 2 + while i < n and text[i] != "\n": + i += 1 + elif next_char == "*": + # Multi-line comment - skip to */ + i += 2 + while i + 1 < n and not (text[i] == "*" and text[i + 1] == "/"): + i += 1 + i += 2 # Skip the closing */ + else: + result.append(char) + i += 1 + else: + result.append(char) + i += 1 + + return "".join(result) + + +def parse_json(text: str, *, allow_comments: bool = False) -> Any: """Parse text as JSON. Args: text: The text to parse as JSON. + allow_comments: If True, strip C-style comments (// and /* */) + before parsing. Useful for JSONC-style configuration files. + Default is False for strict JSON compliance. Returns: The parsed JSON value (dict, list, str, int, float, bool, or None). Raises: JSONParseError: If parsing fails. + + Example: + >>> parse_json('{"key": "value"}') + {'key': 'value'} + + >>> parse_json(''' + ... { + ... // This is a comment + ... "key": "value" + ... } + ... ''', allow_comments=True) + {'key': 'value'} """ + text = text.strip() + + if allow_comments: + text = _strip_json_comments(text) + try: - return json.loads(text.strip()) + return json.loads(text) except json.JSONDecodeError as e: raise JSONParseError( f"Failed to parse JSON: {e.msg} at line {e.lineno}, column {e.colno}", diff --git a/jinjatest/rendered.py b/jinjatest/rendered.py index a406c28..c535097 100644 --- a/jinjatest/rendered.py +++ b/jinjatest/rendered.py @@ -8,6 +8,7 @@ import re from dataclasses import dataclass, field +from functools import partial from typing import TYPE_CHECKING, Any from jinjatest.instrumentation import AnchorIndex @@ -170,16 +171,21 @@ def has_section(self, name: str) -> bool: # Parsing methods - def as_json(self) -> Any: + def as_json(self, *, allow_comments: bool = False) -> Any: """Parse the rendered text as JSON. + Args: + allow_comments: If True, strip C-style comments (// and /* */) + before parsing. Useful for JSONC-style configuration files. + Default is False for strict JSON compliance. + Returns: The parsed JSON value. Raises: JSONParseError: If parsing fails. """ - return parse_json(self.clean_text) + return parse_json(self.clean_text, allow_comments=allow_comments) def as_yaml(self) -> Any: """Parse the rendered text as YAML. @@ -236,16 +242,22 @@ def as_xml(self, *, strict: bool = False) -> XMLElement | list[XMLElement]: # Fenced code block parsing methods - def as_json_blocks(self) -> list[Any]: + def as_json_blocks(self, *, allow_comments: bool = False) -> list[Any]: """Extract and parse all ```json fenced code blocks. + Args: + allow_comments: If True, strip C-style comments (// and /* */) + before parsing. Useful for JSONC-style configuration files. + Default is False for strict JSON compliance. + Returns: List of parsed JSON objects, one per block found. Raises: JSONParseError: If any block contains invalid JSON. """ - return parse_fenced_blocks(self.clean_text, "json", parse_json) + parser = partial(parse_json, allow_comments=allow_comments) + return parse_fenced_blocks(self.clean_text, "json", parser) def as_yaml_blocks(self) -> list[Any]: """Extract and parse all ```yaml fenced code blocks. diff --git a/jinjatest/spec.py b/jinjatest/spec.py index 7553756..ee46e20 100644 --- a/jinjatest/spec.py +++ b/jinjatest/spec.py @@ -28,7 +28,6 @@ ProductionInstrumentation, TestInstrumentation, create_instrumentation, - instrument, ) from jinjatest.markers import transform_markers from jinjatest.rendered import RenderedPrompt @@ -234,7 +233,8 @@ def from_string( if env is None: env = create_environment(**env_kwargs) - instrumentation = instrument(env, test_mode=test_mode) + instrumentation = create_instrumentation(test_mode=test_mode) + env.globals["jt"] = instrumentation template = env.from_string(source) return cls( @@ -292,7 +292,8 @@ def from_file( template_paths = [template_dir] + [Path(p) for p in template_paths] env = create_environment(template_paths=template_paths, **env_kwargs) - instrumentation = instrument(env, test_mode=test_mode) + instrumentation = create_instrumentation(test_mode=test_mode) + env.globals["jt"] = instrumentation else: # For provided env, check if already instrumented existing_jt = env.globals.get("jt") @@ -301,7 +302,8 @@ def from_file( ): instrumentation = existing_jt else: - instrumentation = instrument(env, test_mode=test_mode) + instrumentation = create_instrumentation(test_mode=test_mode) + env.globals["jt"] = instrumentation # Determine template name based on how env was obtained # When env is provided or template_dir is explicitly set, use full path diff --git a/tests/test_basic.py b/tests/test_basic.py index 94dcb31..f5064ea 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -138,6 +138,28 @@ def test_as_json(self) -> None: data = rendered.as_json() assert data == {"name": "Test", "active": True} + def test_as_json_with_comments(self) -> None: + """Test parsing JSON with comments when allow_comments=True.""" + spec = TemplateSpec.from_string("""{ + // This is a comment + "name": "{{ name }}", + "value": 42 /* inline comment */ + }""") + rendered = spec.render({"name": "Test"}) + + data = rendered.as_json(allow_comments=True) + assert data == {"name": "Test", "value": 42} + + def test_as_json_rejects_comments_by_default(self) -> None: + """Test that comments cause parse error by default.""" + from jinjatest.parsers.json_parser import JSONParseError + + spec = TemplateSpec.from_string('{"key": "value"} // comment') + rendered = spec.render({}) + + with pytest.raises(JSONParseError): + rendered.as_json() + class TestProUsers: """Test pro user scenarios from the spec.""" @@ -1907,10 +1929,12 @@ def template_dir(self, tmp_path: Path) -> Path: def test_from_file_with_env_preserves_path(self, template_dir: Path) -> None: """When env is provided, full path should be preserved.""" from jinja2 import Environment, FileSystemLoader - from jinjatest import TemplateSpec, instrument + from jinjatest import TemplateSpec + from jinjatest.instrumentation import create_instrumentation env = Environment(loader=FileSystemLoader(str(template_dir))) - instrument(env, test_mode=True) + inst = create_instrumentation(test_mode=True) + env.globals["jt"] = inst # This should work - path relative to env's loader spec = TemplateSpec.from_file("feature/section/template.j2", env=env) @@ -1923,10 +1947,12 @@ def test_from_file_with_env_reuses_instrumentation( ) -> None: """When env is already instrumented, should reuse instrumentation.""" from jinja2 import Environment, FileSystemLoader - from jinjatest import TemplateSpec, instrument + from jinjatest import TemplateSpec + from jinjatest.instrumentation import create_instrumentation env = Environment(loader=FileSystemLoader(str(template_dir))) - original_inst = instrument(env, test_mode=True) + original_inst = create_instrumentation(test_mode=True) + env.globals["jt"] = original_inst spec = TemplateSpec.from_file("feature/section/template.j2", env=env) @@ -1988,10 +2014,12 @@ def test_from_file_template_dir_ignored_when_env_provided( ) -> None: """template_dir should be ignored when env is provided.""" from jinja2 import Environment, FileSystemLoader - from jinjatest import TemplateSpec, instrument + from jinjatest import TemplateSpec + from jinjatest.instrumentation import create_instrumentation env = Environment(loader=FileSystemLoader(str(template_structure))) - instrument(env, test_mode=True) + inst = create_instrumentation(test_mode=True) + env.globals["jt"] = inst # template_dir is provided but should be ignored since env is provided spec = TemplateSpec.from_file( @@ -2023,10 +2051,12 @@ def template_with_markers(self, tmp_path: Path) -> Path: def test_from_file_with_env_and_markers(self, template_with_markers: Path) -> None: """Test that comment markers work when env is provided.""" from jinja2 import Environment, FileSystemLoader - from jinjatest import TemplateSpec, instrument + from jinjatest import TemplateSpec + from jinjatest.instrumentation import create_instrumentation env = Environment(loader=FileSystemLoader(str(template_with_markers))) - instrument(env, test_mode=True) + inst = create_instrumentation(test_mode=True) + env.globals["jt"] = inst spec = TemplateSpec.from_file("v2/marked.j2", env=env) rendered = spec.render({"name": "World"}) diff --git a/tests/test_fenced_blocks.py b/tests/test_fenced_blocks.py index 8b85b1e..6caa1ad 100644 --- a/tests/test_fenced_blocks.py +++ b/tests/test_fenced_blocks.py @@ -173,6 +173,23 @@ def test_as_json_blocks(self): assert blocks[0] == {"name": "Alice"} assert blocks[1] == {"name": "Bob"} + def test_as_json_blocks_with_comments(self): + template = """ +```json +{ + // User config + "name": "Alice", + "role": "admin" /* important */ +} +``` +""" + spec = TemplateSpec.from_string(template) + rendered = spec.render({}) + + blocks = rendered.as_json_blocks(allow_comments=True) + assert len(blocks) == 1 + assert blocks[0] == {"name": "Alice", "role": "admin"} + def test_as_yaml_blocks(self): template = """ Config 1: diff --git a/tests/test_imports.py b/tests/test_imports.py index 0b641aa..efb605f 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -67,18 +67,12 @@ def test_fenced_block_exports(self): def test_instrumentation_exported(self): from jinjatest import ( - create_instrumentation, TestInstrumentation, ProductionInstrumentation, - TraceRecorder, - AnchorIndex, ) - assert callable(create_instrumentation) assert TestInstrumentation is not None assert ProductionInstrumentation is not None - assert TraceRecorder is not None - assert AnchorIndex is not None def test_utilities_exported(self): from jinjatest import normalize_text, create_environment diff --git a/tests/test_instrumentation.py b/tests/test_instrumentation.py index f0bf02b..b52a03e 100644 --- a/tests/test_instrumentation.py +++ b/tests/test_instrumentation.py @@ -357,83 +357,3 @@ def test_production_instrumentation_clear(self) -> None: inst = ProductionInstrumentation() inst.clear() # Should not raise - - -class TestInstrumentFunction: - """Test the instrument() function for patching Jinja environments.""" - - def test_instrument_adds_jt_global(self) -> None: - """Test that instrument() adds jt global to environment.""" - from jinja2 import Environment - - from jinjatest import instrument - - env = Environment() - inst = instrument(env) - - assert "jt" in env.globals - assert env.globals["jt"] is inst - - def test_instrument_test_mode_true(self) -> None: - """Test instrument() with test_mode=True enables anchors/traces.""" - from jinja2 import Environment - - from jinjatest import instrument - from jinjatest.instrumentation import TestInstrumentation - - env = Environment() - inst = instrument(env, test_mode=True) - - assert isinstance(inst, TestInstrumentation) - - # Test that anchors work - template = env.from_string("{{ jt.anchor('test') }}Hello") - result = template.render() - assert "Hello" in result - - def test_instrument_test_mode_false(self) -> None: - """Test instrument() with test_mode=False returns no-op instrumentation.""" - from jinja2 import Environment - - from jinjatest import instrument - from jinjatest.instrumentation import ProductionInstrumentation - - env = Environment() - inst = instrument(env, test_mode=False) - - assert isinstance(inst, ProductionInstrumentation) - - # Test that anchors are no-ops - template = env.from_string("{{ jt.anchor('test') }}Hello") - result = template.render() - assert result == "Hello" # No anchor marker - - def test_instrument_custom_global_name(self) -> None: - """Test instrument() with custom global_name.""" - from jinja2 import Environment - - from jinjatest import instrument - - env = Environment() - instrument(env, global_name="instr") - - assert "instr" in env.globals - assert "jt" not in env.globals - - template = env.from_string("{{ instr.anchor('test') }}Hello") - result = template.render() - assert "Hello" in result - - def test_instrument_with_existing_environment(self) -> None: - """Test instrument() works with pre-configured environments.""" - from jinja2 import Environment - - from jinjatest import instrument - - env = Environment() - env.globals["my_var"] = "existing" - - instrument(env) - - assert env.globals["my_var"] == "existing" # Preserved - assert "jt" in env.globals # Added diff --git a/tests/test_markers.py b/tests/test_markers.py index dc0b3b6..d1606ad 100644 --- a/tests/test_markers.py +++ b/tests/test_markers.py @@ -6,14 +6,9 @@ import pytest from jinja2 import Environment, FileSystemLoader -from jinjatest import ( - TemplateSpec, - discover_markers, - has_markers, - instrument, - load_template_with_markers, - transform_markers, -) +from jinjatest import TemplateSpec, discover_markers, has_markers +from jinjatest.instrumentation import create_instrumentation +from jinjatest.markers import load_template_with_markers, transform_markers class TestMarkerTransformation: @@ -315,7 +310,8 @@ def test_load_and_transform(self) -> None: ) env = Environment(loader=FileSystemLoader(tmpdir)) - inst = instrument(env) + inst = create_instrumentation(test_mode=True) + env.globals["jt"] = inst template = load_template_with_markers(env, "prompt.j2", inst) result = template.render({"user_input": "Hello"}) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 64938bc..7b13eea 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -2,10 +2,153 @@ import pytest -from jinjatest.parsers.json_parser import JSONParseError, parse_json +from jinjatest.parsers.json_parser import ( + JSONParseError, + _strip_json_comments, + parse_json, +) from jinjatest.parsers.yaml_parser import YAMLParseError, parse_yaml +class TestStripJsonComments: + """Tests for _strip_json_comments function with edge cases.""" + + def test_no_comments(self): + """Plain JSON without comments passes through unchanged.""" + text = '{"key": "value"}' + assert _strip_json_comments(text) == text + + def test_single_line_comment_at_end(self): + """Single-line comment at end of line is removed.""" + text = '{"key": "value"} // comment' + assert _strip_json_comments(text) == '{"key": "value"} ' + + def test_single_line_comment_own_line(self): + """Single-line comment on its own line is removed.""" + text = '{\n// comment\n"key": "value"\n}' + assert _strip_json_comments(text) == '{\n\n"key": "value"\n}' + + def test_multi_line_comment(self): + """Multi-line comment is removed.""" + text = '{"key": /* comment */ "value"}' + assert _strip_json_comments(text) == '{"key": "value"}' + + def test_multi_line_comment_spanning_lines(self): + """Multi-line comment spanning multiple lines is removed.""" + text = '{\n/* line1\nline2\nline3 */\n"key": "value"}' + assert _strip_json_comments(text) == '{\n\n"key": "value"}' + + def test_url_in_string_preserved(self): + """URLs inside strings are not corrupted.""" + text = '{"url": "https://example.com/path"}' + assert _strip_json_comments(text) == text + + def test_double_slash_in_string_preserved(self): + """Double slashes inside strings are preserved.""" + text = '{"note": "use // for comments"}' + assert _strip_json_comments(text) == text + + def test_block_comment_pattern_in_string_preserved(self): + """Block comment patterns inside strings are preserved.""" + text = '{"note": "use /* */ for blocks"}' + assert _strip_json_comments(text) == text + + def test_escaped_quote_in_string(self): + """Escaped quotes inside strings don't break parsing.""" + text = r'{"say": "hello \"world\"", "x": 1} // comment' + result = _strip_json_comments(text) + assert result == r'{"say": "hello \"world\"", "x": 1} ' + + def test_escaped_quote_followed_by_comment_pattern(self): + """Escaped quote followed by // doesn't start a comment.""" + text = r'{"msg": "say \"hi\" // ok"}' + assert _strip_json_comments(text) == text + + def test_backslash_in_string(self): + """Backslashes in strings are handled correctly.""" + text = r'{"path": "C:\\Users\\name"}' + assert _strip_json_comments(text) == text + + def test_empty_string_with_comment_after(self): + """Empty string followed by comment.""" + text = '{"empty": ""} // comment' + assert _strip_json_comments(text) == '{"empty": ""} ' + + def test_multiple_strings_same_line(self): + """Multiple strings on same line with comment.""" + text = '{"a": "x", "b": "y"} // comment' + assert _strip_json_comments(text) == '{"a": "x", "b": "y"} ' + + def test_string_with_newline_escape(self): + """String containing escaped newline.""" + text = r'{"text": "line1\nline2"} // comment' + assert _strip_json_comments(text) == r'{"text": "line1\nline2"} ' + + def test_consecutive_single_line_comments(self): + """Multiple single-line comments in a row.""" + text = '{\n// comment 1\n// comment 2\n"key": "value"}' + assert _strip_json_comments(text) == '{\n\n\n"key": "value"}' + + def test_consecutive_slashes_outside_string(self): + """//// is treated as comment starting with //.""" + text = '{"key": "value"} //// comment' + assert _strip_json_comments(text) == '{"key": "value"} ' + + def test_empty_block_comment(self): + """Empty block comment /**/ is removed.""" + text = '{"key": /**/ "value"}' + assert _strip_json_comments(text) == '{"key": "value"}' + + def test_nested_comment_pattern_in_block(self): + """// inside /* */ is part of the block comment.""" + text = '{"key": /* // nested */ "value"}' + assert _strip_json_comments(text) == '{"key": "value"}' + + def test_comment_at_very_start(self): + """Comment at the very start of input.""" + text = '// comment\n{"key": "value"}' + assert _strip_json_comments(text) == '\n{"key": "value"}' + + def test_comment_at_very_end(self): + """Comment at the very end with no newline.""" + text = '{"key": "value"} // comment' + assert _strip_json_comments(text) == '{"key": "value"} ' + + def test_only_comments(self): + """Input that is only comments.""" + text = "// comment 1\n/* comment 2 */" + assert _strip_json_comments(text) == "\n" + + def test_string_ending_with_backslash_before_real_quote(self): + """String with backslash that is NOT escaping the closing quote.""" + # In JSON: "foo\\" means the string "foo\" (backslash at end) + # The \\\\ in Python raw string = \\ in actual string = \ in JSON string value + text = r'{"path": "foo\\"} // comment' + result = _strip_json_comments(text) + # The comment should be removed + assert result == r'{"path": "foo\\"} ' + + def test_complex_mixed_case(self): + """Complex case with multiple strings, escapes, and comments.""" + text = """{ + // Config file + "url": "https://example.com", /* API endpoint */ + "pattern": "use // for docs", + "escape": "say \\"hello\\"", + "path": "C:\\\\Users" // Windows path + }""" + result = _strip_json_comments(text) + # Verify strings are intact + assert '"url": "https://example.com"' in result + assert '"pattern": "use // for docs"' in result + assert r'"escape": "say \"hello\""' in result + assert r'"path": "C:\\Users"' in result + # Verify comments are removed + assert "// Config file" not in result + assert "/* API endpoint */" not in result + assert "// Windows path" not in result + + class TestJsonParser: """Tests for JSON parser.""" @@ -69,6 +212,81 @@ def test_parse_nested_json(self): result = parse_json('{"outer": {"inner": [1, 2, 3]}}') assert result == {"outer": {"inner": [1, 2, 3]}} + def test_parse_json_with_single_line_comments(self): + """Test parsing JSON with // comments when allow_comments=True.""" + json_text = """{ + // This is a comment + "name": "Alice", + "age": 30 // inline comment + }""" + result = parse_json(json_text, allow_comments=True) + assert result == {"name": "Alice", "age": 30} + + def test_parse_json_with_multi_line_comments(self): + """Test parsing JSON with /* */ comments when allow_comments=True.""" + json_text = """{ + /* This is a + multi-line comment */ + "name": "Bob", + "active": true + }""" + result = parse_json(json_text, allow_comments=True) + assert result == {"name": "Bob", "active": True} + + def test_parse_json_with_mixed_comments(self): + """Test parsing JSON with both comment styles when allow_comments=True.""" + json_text = """{ + // Single line comment + "items": [ + /* block comment */ 1, + 2, // trailing comment + 3 + ] + }""" + result = parse_json(json_text, allow_comments=True) + assert result == {"items": [1, 2, 3]} + + def test_parse_json_comments_disabled_by_default(self): + """Test that comments cause parse error when allow_comments=False (default).""" + json_text = '{"key": "value"} // comment' + with pytest.raises(JSONParseError): + parse_json(json_text) + + def test_parse_json_allow_comments_false_explicit(self): + """Test that allow_comments=False rejects comments.""" + json_text = '{"key": "value"} // comment' + with pytest.raises(JSONParseError): + parse_json(json_text, allow_comments=False) + + def test_parse_json_preserves_url_in_string(self): + """Test that URLs inside strings are not corrupted by comment stripping.""" + json_text = '{"url": "https://example.com/path"}' + result = parse_json(json_text, allow_comments=True) + assert result == {"url": "https://example.com/path"} + + def test_parse_json_preserves_comment_like_patterns_in_strings(self): + """Test that // and /* inside strings are preserved.""" + json_text = """{ + "note": "Use // for single-line comments", + "other": "And /* */ for blocks" + }""" + result = parse_json(json_text, allow_comments=True) + assert result["note"] == "Use // for single-line comments" + assert result["other"] == "And /* */ for blocks" + + def test_parse_json_mixed_real_comments_and_string_patterns(self): + """Test real comments are removed but string content preserved.""" + json_text = """{ + // This comment should be removed + "url": "https://example.com", /* also removed */ + "desc": "Visit // our site" + }""" + result = parse_json(json_text, allow_comments=True) + assert result == { + "url": "https://example.com", + "desc": "Visit // our site", + } + class TestYamlParser: """Tests for YAML parser."""