From e9132b920d78b5e960ec5eefec8e39f32e53101f Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 17 Apr 2025 12:15:50 +0200 Subject: [PATCH 1/7] Add `from_gql_create` VG constructor --- docs/source/api-reference/from_gql_create.rst | 5 + docs/source/api-reference/from_neo4j.rst | 2 +- python-wrapper/src/neo4j_viz/gql_create.py | 298 ++++++++++++++++++ python-wrapper/tests/test_gql_create.py | 130 ++++++++ 4 files changed, 434 insertions(+), 1 deletion(-) create mode 100644 docs/source/api-reference/from_gql_create.rst create mode 100644 python-wrapper/src/neo4j_viz/gql_create.py create mode 100644 python-wrapper/tests/test_gql_create.py diff --git a/docs/source/api-reference/from_gql_create.rst b/docs/source/api-reference/from_gql_create.rst new file mode 100644 index 00000000..8fc3ac57 --- /dev/null +++ b/docs/source/api-reference/from_gql_create.rst @@ -0,0 +1,5 @@ +Import from a GQL CREATE query +------------------------------ + +.. automodule:: neo4j_viz.gql_create + :members: diff --git a/docs/source/api-reference/from_neo4j.rst b/docs/source/api-reference/from_neo4j.rst index f079da26..f4c77fdb 100644 --- a/docs/source/api-reference/from_neo4j.rst +++ b/docs/source/api-reference/from_neo4j.rst @@ -1,5 +1,5 @@ Import from Neo4j ------------------------------------------------- +----------------- .. automodule:: neo4j_viz.neo4j :members: diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py new file mode 100644 index 00000000..e18b714c --- /dev/null +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -0,0 +1,298 @@ +import re +import uuid +from typing import Any, Optional + +from neo4j_viz import Node, Relationship, VisualizationGraph + + +def _parse_value(value_str: str) -> Any: + value_str = value_str.strip() + if not value_str: + return None + + # Parse object + if value_str.startswith("{") and value_str.endswith("}"): + inner = value_str[1:-1].strip() + result = {} + depth = 0 + in_string = None + start_idx = 0 + for i, ch in enumerate(inner): + if in_string is None: + if ch in ["'", '"']: + in_string = ch + elif ch in ["{", "["]: + depth += 1 + elif ch in ["}", "]"]: + depth -= 1 + elif ch == "," and depth == 0: + segment = inner[start_idx:i].strip() + if ":" not in segment: + return None + k, v = segment.split(":", 1) + k = k.strip().strip("'\"") + result[k] = _parse_value(v) + start_idx = i + 1 + else: + if ch == in_string: + in_string = None + if inner[start_idx:]: + segment = inner[start_idx:].strip() + if ":" not in segment: + return None + k, v = segment.split(":", 1) + k = k.strip().strip("'\"") + result[k] = _parse_value(v) + return result + + # Parse list + if value_str.startswith("[") and value_str.endswith("]"): + inner = value_str[1:-1].strip() + items = [] + depth = 0 + in_string = None + start_idx = 0 + for i, ch in enumerate(inner): + if in_string is None: + if ch in ["'", '"']: + in_string = ch + elif ch in ["{", "["]: + depth += 1 + elif ch in ["}", "]"]: + depth -= 1 + elif ch == "," and depth == 0: + items.append(_parse_value(inner[start_idx:i])) + start_idx = i + 1 + else: + if ch == in_string: + in_string = None + if inner[start_idx:]: + items.append(_parse_value(inner[start_idx:])) + return items + + # Parse boolean, float, int, or string + if re.match(r"^-?\d+$", value_str): + return int(value_str) + if re.match(r"^-?\d+\.\d+$", value_str): + return float(value_str) + if value_str.lower() == "true": + return True + if value_str.lower() == "false": + return False + if value_str.lower() == "null": + return None + return value_str.strip("'\"") + + +def _get_snippet(q: str, idx: int, context: int = 15) -> str: + start = max(0, idx - context) + end = min(len(q), idx + context) + return q[start:end].replace("\n", " ") + + +def from_gql_create(query: str) -> VisualizationGraph: + """ + Parse a GQL CREATE query and return a VisualizationGraph object representing the graph it creates. + + All node and relationship properties will be included in the visualization graph. + If the properties are named as the fields of the `Node` or `Relationship` classes, they will be included as + top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. + Additionally, a "labels" property will be added for nodes and a "type" property for relationships. + + Please note that this function is not a full GQL parser, it only handles CREATE queries that do not contain + other clauses like MATCH, WHERE, RETURN, etc, or any Cypher function calls. + It also does not handle all possible GQL syntax, but it should work for most common cases. + + Parameters + ---------- + query : str + The GQL CREATE query to parse + """ + + query = query.strip() + # Case-insensitive check that 'CREATE' is the first non-whitespace token + if not re.match(r"(?i)^create\b", query): + raise ValueError("Query must begin with 'CREATE' (case insensitive).") + + def parse_prop_str( + prop_str: str, prop_start: int, top_level_keys: set[str] + ) -> tuple[dict[str, Any], dict[str, Any]]: + top_level: dict[str, Any] = {} + props: dict[str, Any] = {} + depth = 0 + in_string = None + start_idx = 0 + for i, ch in enumerate(prop_str): + if in_string is None: + if ch in ["'", '"']: + in_string = ch + elif ch in ["{", "["]: + depth += 1 + elif ch in ["}", "]"]: + depth -= 1 + elif ch == "," and depth == 0: + pair = prop_str[start_idx:i].strip() + if ":" not in pair: + snippet = _get_snippet(query, prop_start + start_idx) + raise ValueError(f"Property syntax error near: `{snippet}`.") + k, v = pair.split(":", 1) + k = k.strip().strip("'\"") + + if k in top_level_keys: + top_level[k] = _parse_value(v) + else: + props[k] = _parse_value(v) + + start_idx = i + 1 + else: + if ch == in_string: + in_string = None + + if prop_str[start_idx:]: + pair = prop_str[start_idx:].strip() + if ":" not in pair: + snippet = _get_snippet(query, prop_start + start_idx) + raise ValueError(f"Property syntax error near: `{snippet}`.") + k, v = pair.split(":", 1) + k = k.strip().strip("'\"") + + if k in top_level_keys: + top_level[k] = _parse_value(v) + else: + props[k] = _parse_value(v) + + return top_level, props + + def parse_labels_and_props( + s: str, top_level_keys: set[str] + ) -> tuple[Optional[str], dict[str, Any], dict[str, Any]]: + prop_match = re.search(r"\{(.*)\}", s) + prop_str = "" + if prop_match: + prop_str = prop_match.group(1) + prop_start = query.index(prop_str, query.index(s)) + s = s[: prop_match.start()].strip() + alias_labels = re.split(r"[:&]", s) + raw_alias = alias_labels[0].strip() + final_alias = raw_alias if raw_alias else None + + if prop_str: + top_level, props = parse_prop_str(prop_str, prop_start, top_level_keys) + else: + top_level = {} + props = {} + + label_list = [lbl.strip() for lbl in alias_labels[1:]] + if "labels" in props: + props["__labels"] = props["labels"] + props["labels"] = sorted(label_list) + + return final_alias, top_level, props + + nodes = [] + relationships = [] + alias_to_id = {} + anonymous_count = 0 + + query = re.sub(r"(?i)^create\s*", "", query, count=1).rstrip(";").strip() + parts = [] + paren_level = 0 + bracket_level = 0 + current: list[str] = [] + for i, char in enumerate(query): + if char == "(": + paren_level += 1 + elif char == ")": + paren_level -= 1 + if paren_level < 0: + snippet = _get_snippet(query, i) + raise ValueError(f"Unbalanced parentheses near: `{snippet}`.") + if char == "[": + bracket_level += 1 + elif char == "]": + bracket_level -= 1 + if bracket_level < 0: + snippet = _get_snippet(query, i) + raise ValueError(f"Unbalanced square brackets near: `{snippet}`.") + if char == "," and paren_level == 0 and bracket_level == 0: + parts.append("".join(current).strip()) + current = [] + else: + current.append(char) + parts.append("".join(current).strip()) + if paren_level != 0: + snippet = _get_snippet(query, len(query) - 1) + raise ValueError(f"Unbalanced parentheses near: `{snippet}`.") + if bracket_level != 0: + snippet = _get_snippet(query, len(query) - 1) + raise ValueError(f"Unbalanced square brackets near: `{snippet}`.") + + node_pattern = re.compile(r"^\(([^)]+)\)$") + rel_pattern = re.compile(r"^\(([^)]+)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]+)\)$") + + node_top_level_keys = set(Node.model_fields.keys()) + node_top_level_keys.remove("id") + + rel_top_level_keys = set(Relationship.model_fields.keys()) + rel_top_level_keys.remove("id") + rel_top_level_keys.remove("source") + rel_top_level_keys.remove("target") + + empty_set: set[str] = set() + + for part in parts: + node_m = node_pattern.match(part) + if node_m: + alias_labels_props = node_m.group(1).strip() + alias, top_level, props = parse_labels_and_props(alias_labels_props, node_top_level_keys) + if not alias: + alias = f"_anon_{anonymous_count}" + anonymous_count += 1 + if alias not in alias_to_id: + alias_to_id[alias] = str(uuid.uuid4()) + nodes.append(Node(id=alias_to_id[alias], **top_level, properties=props)) + else: + rel_m = rel_pattern.match(part) + if rel_m: + left_node = rel_m.group(1).strip() + rel_type = rel_m.group(2).replace(":", "").strip() + right_node = rel_m.group(4).strip() + + left_alias, _, _ = parse_labels_and_props(left_node, empty_set) + if not left_alias or left_alias not in alias_to_id: + snippet = _get_snippet(query, query.index(left_node)) + raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") + + right_alias, _, _ = parse_labels_and_props(right_node, empty_set) + if not right_alias or right_alias not in alias_to_id: + snippet = _get_snippet(query, query.index(right_node)) + raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.") + + rel_id = str(uuid.uuid4()) + rel_props_str = rel_m.group(3) or "" + if rel_props_str: + inner_str = rel_props_str.strip("{}").strip() + prop_start = query.index(inner_str, query.index(inner_str)) + top_level, props = parse_prop_str(inner_str, prop_start, rel_top_level_keys) + else: + top_level = {} + props = {} + + if "type" in props: + props["__type"] = props["type"] + props["type"] = rel_type + + relationships.append( + Relationship( + id=rel_id, + source=alias_to_id[left_alias], + target=alias_to_id[right_alias], + **top_level, + properties=props, + ) + ) + else: + snippet = part[:30] + raise ValueError(f"Invalid element in CREATE near: `{snippet}`.") + + return VisualizationGraph(nodes=nodes, relationships=relationships) diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py new file mode 100644 index 00000000..d8ad28bf --- /dev/null +++ b/python-wrapper/tests/test_gql_create.py @@ -0,0 +1,130 @@ +from typing import Any + +import pytest + +from neo4j_viz.gql_create import from_gql_create + + +def test_from_gql_create() -> None: + query = """ + CREATE + (a:User {name: 'Alice', age: 23, labels: ['Happy'], "id": 42}), + (b:User:person {name: "Bridget", age: 34, "caption": "Bridget"}), + (wizardMan:User {name: 'Charles: The wizard, man', hello: true, height: NULL}), + (d:User), + (a)-[:LINK {weight: 0.5}]->(b), + (e:User {age: 67, my_map: {key: 'value', key2: 3.14, key3: [1, 2, 3], key4: {a: 1, b: null}}}), + (:User {age: 42, pets: ['cat', false, 'dog']}), + (f:User&Person + {name: 'Fawad', age: 78}), + (a)-[:LINK {weight: 4}]->(wizardMan), + (e)-[:LINK]->(d), + (e)-[:OTHER_LINK {weight: -2, type: 1, source: 1337, caption: "Balloon"}]->(f); + """ + expected_node_dicts: list[dict[str, dict[str, Any]]] = [ + { + "top_level": {}, + "properties": {"name": "Alice", "age": 23, "labels": ["User"], "__labels": ["Happy"], "id": 42}, + }, + { + "top_level": {"caption": "Bridget"}, + "properties": {"name": "Bridget", "age": 34, "labels": ["User", "person"]}, + }, + { + "top_level": {}, + "properties": {"name": "Charles: The wizard, man", "hello": True, "height": None, "labels": ["User"]}, + }, + {"top_level": {}, "properties": {"labels": ["User"]}}, + { + "top_level": {}, + "properties": { + "age": 67, + "my_map": {"key": "value", "key2": 3.14, "key3": [1, 2, 3], "key4": {"a": 1, "b": None}}, + "labels": ["User"], + }, + }, + {"top_level": {}, "properties": {"age": 42, "pets": ["cat", False, "dog"], "labels": ["User"]}}, + {"top_level": {}, "properties": {"name": "Fawad", "age": 78, "labels": ["Person", "User"]}}, + ] + + VG = from_gql_create(query) + + assert len(VG.nodes) == len(expected_node_dicts) + for i, exp_node in enumerate(expected_node_dicts): + created_node = VG.nodes[i] + + assert created_node.model_dump(exclude_none=True, exclude={"properties", "id"}) == exp_node["top_level"] + assert created_node.properties == exp_node["properties"] + + expected_relationships_dicts: list[dict[str, Any]] = [ + {"source_idx": 0, "target_idx": 1, "top_level": {}, "properties": {"weight": 0.5, "type": "LINK"}}, + {"source_idx": 0, "target_idx": 2, "top_level": {}, "properties": {"weight": 4, "type": "LINK"}}, + {"source_idx": 4, "target_idx": 3, "top_level": {}, "properties": {"type": "LINK"}}, + { + "source_idx": 4, + "target_idx": 6, + "top_level": {"caption": "Balloon"}, + "properties": {"weight": -2, "type": "OTHER_LINK", "__type": 1, "source": 1337}, + }, + ] + + assert len(VG.relationships) == len(expected_relationships_dicts) + for i, exp_rel in enumerate(expected_relationships_dicts): + created_rel = VG.relationships[i] + assert created_rel.source == VG.nodes[exp_rel["source_idx"]].id + assert created_rel.target == VG.nodes[exp_rel["target_idx"]].id + assert ( + created_rel.model_dump(exclude_none=True, exclude={"properties", "id", "source", "target"}) + == exp_rel["top_level"] + ) + assert created_rel.properties == exp_rel["properties"] + + +def test_unbalanced_parentheses_snippet() -> None: + query = "CREATE (a:User, (b:User })" + with pytest.raises(ValueError, match=r"Unbalanced parentheses near: `.*\(b:User.*"): + from_gql_create(query) + + +def test_unbalanced_brackets_snippet() -> None: + query = "CREATE (a)-[:LINK {weight: 0.5}->(b)" + with pytest.raises(ValueError, match=r"Unbalanced square brackets near: `eight: 0.5}->\(b\)`."): + from_gql_create(query) + + +def test_node_property_syntax_error_snippet1() -> None: + query = "CREATE (a:User {x, y:4})" + with pytest.raises(ValueError, match=r"Property syntax error near: `.*x, y.*"): + from_gql_create(query) + + +def test_node_property_syntax_error_snippet2() -> None: + query = "CREATE (a:User {x:5,, y:4})" + with pytest.raises(ValueError, match=r"Property syntax error near: `.*x:5,, y.*"): + from_gql_create(query) + + +def test_invalid_element_in_create_snippet() -> None: + query = "CREATE [not_a_node]" + with pytest.raises(ValueError, match=r"Invalid element in CREATE near: `\[not_a_node.*"): + from_gql_create(query) + + +def test_rel_property_syntax_error_snippet() -> None: + query = "CREATE (a:User), (b:User), (a)-[:LINK {weight0.5}]->(b)" + with pytest.raises(ValueError, match=r"Property syntax error near: `\), \(a\)-\[:LINK {weight0.5}\]->\(b`."): + from_gql_create(query) + + +def test_unknown_node_alias() -> None: + query = "CREATE (a)-[:LINK {weight0.5}]->(b)" + with pytest.raises( + ValueError, match=r"Relationship references unknown node alias: 'a' near: `\(a\)-\[:LINK {weig`" + ): + from_gql_create(query) + + +def test_no_create_keyword() -> None: + query = "(a:User {y:4})" + with pytest.raises(ValueError, match=r"Query must begin with 'CREATE' \(case insensitive\)."): + from_gql_create(query) From 70f9241c70f48ddb83b9973a62c857aa6aca0282 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Tue, 22 Apr 2025 16:08:55 +0200 Subject: [PATCH 2/7] Add more params to `from_gql_create` --- python-wrapper/src/neo4j_viz/gql_create.py | 43 +++++++++++- python-wrapper/src/neo4j_viz/neo4j.py | 2 +- python-wrapper/tests/test_gql_create.py | 79 +++++++++++++++++++++- 3 files changed, 119 insertions(+), 5 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index e18b714c..e1de281d 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -90,7 +90,13 @@ def _get_snippet(q: str, idx: int, context: int = 15) -> str: return q[start:end].replace("\n", " ") -def from_gql_create(query: str) -> VisualizationGraph: +def from_gql_create( + query: str, + size_property: Optional[str] = None, + node_caption: Optional[str] = "labels", + relationship_caption: Optional[str] = "type", + node_radius_min_max: Optional[tuple[float, float]] = (3, 60), +) -> VisualizationGraph: """ Parse a GQL CREATE query and return a VisualizationGraph object representing the graph it creates. @@ -107,6 +113,15 @@ def from_gql_create(query: str) -> VisualizationGraph: ---------- query : str The GQL CREATE query to parse + size_property : str, optional + Property to use for node size, by default None. + node_caption : str, optional + Property to use as the node caption, by default the node labels will be used. + relationship_caption : str, optional + Property to use as the relationship caption, by default the relationship type will be used. + node_radius_min_max : tuple[float, float], optional + Minimum and maximum node radius, by default (3, 60). + To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range. """ query = query.strip() @@ -295,4 +310,28 @@ def parse_labels_and_props( snippet = part[:30] raise ValueError(f"Invalid element in CREATE near: `{snippet}`.") - return VisualizationGraph(nodes=nodes, relationships=relationships) + if size_property is not None: + for node in nodes: + node.size = node.properties.get(size_property) + + if node_caption is not None: + for node in nodes: + if node_caption == "labels": + if len(node.properties["labels"]) > 0: + node.caption = ":".join([label for label in node.properties["labels"]]) + else: + node.caption = str(node.properties.get(node_caption)) + + if relationship_caption is not None: + for rel in relationships: + if relationship_caption == "type": + rel.caption = rel.properties["type"] + else: + rel.caption = str(rel.properties.get(relationship_caption)) + + VG = VisualizationGraph(nodes=nodes, relationships=relationships) + + if (node_radius_min_max is not None) and (size_property is not None): + VG.resize_nodes(node_radius_min_max=node_radius_min_max) + + return VG diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index 1ab21955..83b5d75c 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -56,7 +56,7 @@ def from_neo4j( VG = VisualizationGraph(nodes, relationships) - if node_radius_min_max and size_property is not None: + if (node_radius_min_max is not None) and (size_property is not None): VG.resize_nodes(node_radius_min_max=node_radius_min_max) return VG diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py index d8ad28bf..155282fb 100644 --- a/python-wrapper/tests/test_gql_create.py +++ b/python-wrapper/tests/test_gql_create.py @@ -5,7 +5,7 @@ from neo4j_viz.gql_create import from_gql_create -def test_from_gql_create() -> None: +def test_from_gql_create_syntax() -> None: query = """ CREATE (a:User {name: 'Alice', age: 23, labels: ['Happy'], "id": 42}), @@ -47,7 +47,7 @@ def test_from_gql_create() -> None: {"top_level": {}, "properties": {"name": "Fawad", "age": 78, "labels": ["Person", "User"]}}, ] - VG = from_gql_create(query) + VG = from_gql_create(query, node_caption=None, relationship_caption=None) assert len(VG.nodes) == len(expected_node_dicts) for i, exp_node in enumerate(expected_node_dicts): @@ -80,6 +80,81 @@ def test_from_gql_create() -> None: assert created_rel.properties == exp_rel["properties"] +def test_from_gql_create_captions() -> None: + query = """ + CREATE + (a:User {name: 'Alice', age: 23}), + (b:User:person {name: "Bridget", age: 34, "caption": "Bridget"}), + (a)-[:LINK {weight: 0.5}]->(b); + """ + expected_node_dicts: list[dict[str, dict[str, Any]]] = [ + { + "top_level": {"caption": "User"}, + "properties": {"name": "Alice", "age": 23, "labels": ["User"]}, + }, + { + "top_level": {"caption": "User:person"}, + "properties": {"name": "Bridget", "age": 34, "labels": ["User", "person"]}, + }, + ] + + VG = from_gql_create(query) + + assert len(VG.nodes) == len(expected_node_dicts) + for i, exp_node in enumerate(expected_node_dicts): + created_node = VG.nodes[i] + + assert created_node.model_dump(exclude_none=True, exclude={"properties", "id"}) == exp_node["top_level"] + assert created_node.properties == exp_node["properties"] + + expected_relationships_dicts: list[dict[str, Any]] = [ + { + "source_idx": 0, + "target_idx": 1, + "top_level": {"caption": "LINK"}, + "properties": {"weight": 0.5, "type": "LINK"}, + }, + ] + + assert len(VG.relationships) == len(expected_relationships_dicts) + for i, exp_rel in enumerate(expected_relationships_dicts): + created_rel = VG.relationships[i] + assert created_rel.source == VG.nodes[exp_rel["source_idx"]].id + assert created_rel.target == VG.nodes[exp_rel["target_idx"]].id + assert ( + created_rel.model_dump(exclude_none=True, exclude={"properties", "id", "source", "target"}) + == exp_rel["top_level"] + ) + assert created_rel.properties == exp_rel["properties"] + + +def test_from_gql_create_sizes() -> None: + query = """ + CREATE + (a:User {name: 'Alice', age: 23}), + (b:User:person {name: "Bridget", age: 34, "caption": "Bridget"}); + """ + expected_node_dicts: list[dict[str, dict[str, Any]]] = [ + { + "top_level": {"size": 3.0}, + "properties": {"name": "Alice", "age": 23, "labels": ["User"]}, + }, + { + "top_level": {"caption": "Bridget", "size": 60.0}, + "properties": {"name": "Bridget", "age": 34, "labels": ["User", "person"]}, + }, + ] + + VG = from_gql_create(query, size_property="age", node_caption=None, relationship_caption=None) + + assert len(VG.nodes) == len(expected_node_dicts) + for i, exp_node in enumerate(expected_node_dicts): + created_node = VG.nodes[i] + + assert created_node.model_dump(exclude_none=True, exclude={"properties", "id"}) == exp_node["top_level"] + assert created_node.properties == exp_node["properties"] + + def test_unbalanced_parentheses_snippet() -> None: query = "CREATE (a:User, (b:User })" with pytest.raises(ValueError, match=r"Unbalanced parentheses near: `.*\(b:User.*"): From 81b27884533a3ea86af707f9ce8d841d4ca9d30b Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Tue, 22 Apr 2025 16:09:14 +0200 Subject: [PATCH 3/7] Add more docs for `from_gql_create` --- docs/source/integration.rst | 64 ++++++++++++++++++++++++++++++++---- examples/neo4j-example.ipynb | 8 +++++ 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/docs/source/integration.rst b/docs/source/integration.rst index 31687303..be915422 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -4,8 +4,9 @@ Integration with other libraries In addition to creating graphs from scratch, with ``neo4j-viz`` as is shown in the :doc:`Getting started section <./getting-started>`, you can also import data directly from external sources. In this section we will cover how to import data from `Pandas DataFrames `_, -`Neo4j Graph Data Science `_ and -`Neo4j Database `_. +`Neo4j Graph Data Science `_, +`Neo4j Database `_ and +`GQL CREATE queries `_. .. contents:: On this page: @@ -159,7 +160,7 @@ more extensive example. Neo4j Database ---------------- +-------------- The ``neo4j-viz`` library provides a convenience method for importing data from Neo4j. It requires and additional dependency to be installed, which you can do by running: @@ -171,15 +172,15 @@ It requires and additional dependency to be installed, which you can do by runni Once you have installed the additional dependency, you can use the :doc:`from_neo4j <./api-reference/from_neo4j>` method to import query results from Neo4j. -The ``from_neo4j`` method takes one mandatory positional parameters: +The ``from_neo4j`` method takes one mandatory positional parameter: * A ``result`` representing the query result either in form of `neo4j.graph.Graph` or `neo4j.Result`. -The ``node_caption`` parameter is also optional, and indicates the node property to use for the caption of each node in the visualization. - We can also provide an optional ``size_property`` parameter, which should refer to a node property, and will be used to determine the sizes of the nodes in the visualization. +The ``node_caption`` and ``relationship_caption`` parameters are also optional, and indicate the node and relationship properties to use for the captions of each element in the visualization. + The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for the visualization. It is a tuple of two numbers, representing the radii (sizes) in pixels of the smallest and largest nodes respectively in @@ -218,3 +219,54 @@ In this small example, we import a graph from a Neo4j query result. Please see the :doc:`Visualizing Neo4j Graphs tutorial <./tutorials/neo4j-example>` for a more extensive example. + + +GQL ``CREATE`` query +-------------------- + +The ``neo4j-viz`` library provides a convenience method for creating visualization graphs from GQL ``CREATE`` queries via the :doc:`from_neo4j <./api-reference/from_gql_create>` method method. + +The ``from_gql_create`` method takes one mandatory positional parameter: + +* A valid ``query`` representing a GQL ``CREATE`` query as a string. + +We can also provide an optional ``size_property`` parameter, which should refer to a node property, +and will be used to determine the sizes of the nodes in the visualization. + +The ``node_caption`` and ``relationship_caption`` parameters are also optional, and indicate the node and relationship properties to use for the captions of each element in the visualization. + +The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for +the visualization. +It is a tuple of two numbers, representing the radii (sizes) in pixels of the smallest and largest nodes respectively in +the visualization. +The node sizes will be scaled such that the smallest node will have the size of the first value, and the largest node +will have the size of the second value. +The other nodes will be scaled linearly between these two values according to their relative size. +This can be useful if node sizes vary a lot, or are all very small or very big. + + +Example +~~~~~~~ + +In this small example, we create a visualization graph from a GQL ``CREATE`` query. + +.. code-block:: python + + from neo4j_viz.gql_create import from_gql_create + + query = """ + CREATE + (a:User {name: 'Alice', age: 23}), + (b:User {name: 'Bridget', age: 34}), + (c:User {name: 'Charles', age: 45}), + (d:User {name: 'Dana', age: 56}), + (e:User {name: 'Eve', age: 67}), + (f:User {name: 'Fawad', age: 78}), + + (a)-[:LINK {weight: 0.5}]->(b), + (a)-[:LINK {weight: 4}]->(c), + (e)-[:LINK {weight: 1.1}]->(d), + (e)-[:LINK {weight: -2}]->(f); + """ + + VG = from_gql_create(query) diff --git a/examples/neo4j-example.ipynb b/examples/neo4j-example.ipynb index 604a205b..3fb3d163 100644 --- a/examples/neo4j-example.ipynb +++ b/examples/neo4j-example.ipynb @@ -279,6 +279,14 @@ " )\n", " print(result.summary.counters)" ] + }, + { + "cell_type": "markdown", + "id": "daf2f685c2e13fb9", + "metadata": {}, + "source": [ + "**NOTE:** Since in this example we didn't already have a Neo4j DB populated with data, it would actually have been more convenient to use the serverless `from_gql_create` importer method to create our `VisualizationGraph`." + ] } ], "metadata": { From cec307a51c2889aae99fc61fd734cfbde5823357 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 24 Apr 2025 14:16:55 +0200 Subject: [PATCH 4/7] Address some review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/gql_create.py | 168 +++++++++++---------- 1 file changed, 88 insertions(+), 80 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index e1de281d..7752a24c 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -10,7 +10,7 @@ def _parse_value(value_str: str) -> Any: if not value_str: return None - # Parse object + # Parse map if value_str.startswith("{") and value_str.endswith("}"): inner = value_str[1:-1].strip() result = {} @@ -43,6 +43,7 @@ def _parse_value(value_str: str) -> Any: k, v = segment.split(":", 1) k = k.strip().strip("'\"") result[k] = _parse_value(v) + return result # Parse list @@ -68,6 +69,7 @@ def _parse_value(value_str: str) -> Any: in_string = None if inner[start_idx:]: items.append(_parse_value(inner[start_idx:])) + return items # Parse boolean, float, int, or string @@ -81,12 +83,91 @@ def _parse_value(value_str: str) -> Any: return False if value_str.lower() == "null": return None + return value_str.strip("'\"") +def _parse_prop_str( + query: str, prop_str: str, prop_start: int, top_level_keys: set[str] +) -> tuple[dict[str, Any], dict[str, Any]]: + top_level: dict[str, Any] = {} + props: dict[str, Any] = {} + depth = 0 + in_string = None + start_idx = 0 + for i, ch in enumerate(prop_str): + if in_string is None: + if ch in ["'", '"']: + in_string = ch + elif ch in ["{", "["]: + depth += 1 + elif ch in ["}", "]"]: + depth -= 1 + elif ch == "," and depth == 0: + pair = prop_str[start_idx:i].strip() + if ":" not in pair: + snippet = _get_snippet(query, prop_start + start_idx) + raise ValueError(f"Property syntax error near: `{snippet}`.") + k, v = pair.split(":", 1) + k = k.strip().strip("'\"") + + if k in top_level_keys: + top_level[k] = _parse_value(v) + else: + props[k] = _parse_value(v) + + start_idx = i + 1 + else: + if ch == in_string: + in_string = None + + if prop_str[start_idx:]: + pair = prop_str[start_idx:].strip() + if ":" not in pair: + snippet = _get_snippet(query, prop_start + start_idx) + raise ValueError(f"Property syntax error near: `{snippet}`.") + k, v = pair.split(":", 1) + k = k.strip().strip("'\"") + + if k in top_level_keys: + top_level[k] = _parse_value(v) + else: + props[k] = _parse_value(v) + + return top_level, props + + +def _parse_labels_and_props( + query: str, s: str, top_level_keys: set[str] +) -> tuple[Optional[str], dict[str, Any], dict[str, Any]]: + prop_match = re.search(r"\{(.*)\}", s) + prop_str = "" + if prop_match: + prop_str = prop_match.group(1) + prop_start = query.index(prop_str, query.index(s)) + s = s[: prop_match.start()].strip() + alias_labels = re.split(r"[:&]", s) + raw_alias = alias_labels[0].strip() + final_alias = raw_alias if raw_alias else None + + if prop_str: + top_level, props = _parse_prop_str(query, prop_str, prop_start, top_level_keys) + else: + top_level = {} + props = {} + + label_list = [lbl.strip() for lbl in alias_labels[1:]] + if "labels" in props: + props["__labels"] = props["labels"] + props["labels"] = sorted(label_list) + + return final_alias, top_level, props + + def _get_snippet(q: str, idx: int, context: int = 15) -> str: start = max(0, idx - context) end = min(len(q), idx + context) + return q[start:end].replace("\n", " ") @@ -108,6 +189,8 @@ def from_gql_create( Please note that this function is not a full GQL parser, it only handles CREATE queries that do not contain other clauses like MATCH, WHERE, RETURN, etc, or any Cypher function calls. It also does not handle all possible GQL syntax, but it should work for most common cases. + For more complex cases, we recommend using a Neo4j database and the `from_neo4j` method. + Parameters ---------- @@ -129,81 +212,6 @@ def from_gql_create( if not re.match(r"(?i)^create\b", query): raise ValueError("Query must begin with 'CREATE' (case insensitive).") - def parse_prop_str( - prop_str: str, prop_start: int, top_level_keys: set[str] - ) -> tuple[dict[str, Any], dict[str, Any]]: - top_level: dict[str, Any] = {} - props: dict[str, Any] = {} - depth = 0 - in_string = None - start_idx = 0 - for i, ch in enumerate(prop_str): - if in_string is None: - if ch in ["'", '"']: - in_string = ch - elif ch in ["{", "["]: - depth += 1 - elif ch in ["}", "]"]: - depth -= 1 - elif ch == "," and depth == 0: - pair = prop_str[start_idx:i].strip() - if ":" not in pair: - snippet = _get_snippet(query, prop_start + start_idx) - raise ValueError(f"Property syntax error near: `{snippet}`.") - k, v = pair.split(":", 1) - k = k.strip().strip("'\"") - - if k in top_level_keys: - top_level[k] = _parse_value(v) - else: - props[k] = _parse_value(v) - - start_idx = i + 1 - else: - if ch == in_string: - in_string = None - - if prop_str[start_idx:]: - pair = prop_str[start_idx:].strip() - if ":" not in pair: - snippet = _get_snippet(query, prop_start + start_idx) - raise ValueError(f"Property syntax error near: `{snippet}`.") - k, v = pair.split(":", 1) - k = k.strip().strip("'\"") - - if k in top_level_keys: - top_level[k] = _parse_value(v) - else: - props[k] = _parse_value(v) - - return top_level, props - - def parse_labels_and_props( - s: str, top_level_keys: set[str] - ) -> tuple[Optional[str], dict[str, Any], dict[str, Any]]: - prop_match = re.search(r"\{(.*)\}", s) - prop_str = "" - if prop_match: - prop_str = prop_match.group(1) - prop_start = query.index(prop_str, query.index(s)) - s = s[: prop_match.start()].strip() - alias_labels = re.split(r"[:&]", s) - raw_alias = alias_labels[0].strip() - final_alias = raw_alias if raw_alias else None - - if prop_str: - top_level, props = parse_prop_str(prop_str, prop_start, top_level_keys) - else: - top_level = {} - props = {} - - label_list = [lbl.strip() for lbl in alias_labels[1:]] - if "labels" in props: - props["__labels"] = props["labels"] - props["labels"] = sorted(label_list) - - return final_alias, top_level, props - nodes = [] relationships = [] alias_to_id = {} @@ -259,7 +267,7 @@ def parse_labels_and_props( node_m = node_pattern.match(part) if node_m: alias_labels_props = node_m.group(1).strip() - alias, top_level, props = parse_labels_and_props(alias_labels_props, node_top_level_keys) + alias, top_level, props = _parse_labels_and_props(query, alias_labels_props, node_top_level_keys) if not alias: alias = f"_anon_{anonymous_count}" anonymous_count += 1 @@ -273,12 +281,12 @@ def parse_labels_and_props( rel_type = rel_m.group(2).replace(":", "").strip() right_node = rel_m.group(4).strip() - left_alias, _, _ = parse_labels_and_props(left_node, empty_set) + left_alias, _, _ = _parse_labels_and_props(query, left_node, empty_set) if not left_alias or left_alias not in alias_to_id: snippet = _get_snippet(query, query.index(left_node)) raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") - right_alias, _, _ = parse_labels_and_props(right_node, empty_set) + right_alias, _, _ = _parse_labels_and_props(query, right_node, empty_set) if not right_alias or right_alias not in alias_to_id: snippet = _get_snippet(query, query.index(right_node)) raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.") @@ -288,7 +296,7 @@ def parse_labels_and_props( if rel_props_str: inner_str = rel_props_str.strip("{}").strip() prop_start = query.index(inner_str, query.index(inner_str)) - top_level, props = parse_prop_str(inner_str, prop_start, rel_top_level_keys) + top_level, props = _parse_prop_str(query, inner_str, prop_start, rel_top_level_keys) else: top_level = {} props = {} From 01295fc017e9f059a5c646bce5adb616e863a65b Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 24 Apr 2025 14:30:10 +0200 Subject: [PATCH 5/7] Add additional parsing error test case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/gql_create.py | 3 +++ python-wrapper/tests/test_gql_create.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index 7752a24c..fcff6f66 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -36,6 +36,7 @@ def _parse_value(value_str: str) -> Any: else: if ch == in_string: in_string = None + if inner[start_idx:]: segment = inner[start_idx:].strip() if ":" not in segment: @@ -67,6 +68,7 @@ def _parse_value(value_str: str) -> Any: else: if ch == in_string: in_string = None + if inner[start_idx:]: items.append(_parse_value(inner[start_idx:])) @@ -242,6 +244,7 @@ def from_gql_create( current = [] else: current.append(char) + parts.append("".join(current).strip()) if paren_level != 0: snippet = _get_snippet(query, len(query) - 1) diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py index 155282fb..e55317c9 100644 --- a/python-wrapper/tests/test_gql_create.py +++ b/python-wrapper/tests/test_gql_create.py @@ -173,6 +173,12 @@ def test_node_property_syntax_error_snippet1() -> None: from_gql_create(query) +def test_node_property_paren_imbalance() -> None: + query = "CREATE (a:User {a: [1, b: 2, c: (3, 4)})" + with pytest.raises(ValueError, match=r"Unbalanced square brackets near: `: 2, c: \(3, 4\)}"): + from_gql_create(query) + + def test_node_property_syntax_error_snippet2() -> None: query = "CREATE (a:User {x:5,, y:4})" with pytest.raises(ValueError, match=r"Property syntax error near: `.*x:5,, y.*"): From 4ef1c2fbd90482654cf1a00bef1b53ff1f03410e Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 24 Apr 2025 14:50:08 +0200 Subject: [PATCH 6/7] Add support for label-less nodes to `from_gql_create` --- python-wrapper/src/neo4j_viz/gql_create.py | 10 +--------- python-wrapper/tests/test_gql_create.py | 6 +++++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index fcff6f66..b8ab0cd4 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -210,7 +210,6 @@ def from_gql_create( """ query = query.strip() - # Case-insensitive check that 'CREATE' is the first non-whitespace token if not re.match(r"(?i)^create\b", query): raise ValueError("Query must begin with 'CREATE' (case insensitive).") @@ -253,7 +252,7 @@ def from_gql_create( snippet = _get_snippet(query, len(query) - 1) raise ValueError(f"Unbalanced square brackets near: `{snippet}`.") - node_pattern = re.compile(r"^\(([^)]+)\)$") + node_pattern = re.compile(r"^\(([^)]*)\)$") # Changed here rel_pattern = re.compile(r"^\(([^)]+)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]+)\)$") node_top_level_keys = set(Node.model_fields.keys()) @@ -283,12 +282,10 @@ def from_gql_create( left_node = rel_m.group(1).strip() rel_type = rel_m.group(2).replace(":", "").strip() right_node = rel_m.group(4).strip() - left_alias, _, _ = _parse_labels_and_props(query, left_node, empty_set) if not left_alias or left_alias not in alias_to_id: snippet = _get_snippet(query, query.index(left_node)) raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") - right_alias, _, _ = _parse_labels_and_props(query, right_node, empty_set) if not right_alias or right_alias not in alias_to_id: snippet = _get_snippet(query, query.index(right_node)) @@ -303,11 +300,9 @@ def from_gql_create( else: top_level = {} props = {} - if "type" in props: props["__type"] = props["type"] props["type"] = rel_type - relationships.append( Relationship( id=rel_id, @@ -324,7 +319,6 @@ def from_gql_create( if size_property is not None: for node in nodes: node.size = node.properties.get(size_property) - if node_caption is not None: for node in nodes: if node_caption == "labels": @@ -332,7 +326,6 @@ def from_gql_create( node.caption = ":".join([label for label in node.properties["labels"]]) else: node.caption = str(node.properties.get(node_caption)) - if relationship_caption is not None: for rel in relationships: if relationship_caption == "type": @@ -341,7 +334,6 @@ def from_gql_create( rel.caption = str(rel.properties.get(relationship_caption)) VG = VisualizationGraph(nodes=nodes, relationships=relationships) - if (node_radius_min_max is not None) and (size_property is not None): VG.resize_nodes(node_radius_min_max=node_radius_min_max) diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py index e55317c9..679b046c 100644 --- a/python-wrapper/tests/test_gql_create.py +++ b/python-wrapper/tests/test_gql_create.py @@ -15,8 +15,10 @@ def test_from_gql_create_syntax() -> None: (a)-[:LINK {weight: 0.5}]->(b), (e:User {age: 67, my_map: {key: 'value', key2: 3.14, key3: [1, 2, 3], key4: {a: 1, b: null}}}), (:User {age: 42, pets: ['cat', false, 'dog']}), + (), (f:User&Person {name: 'Fawad', age: 78}), + ({age: 29}), (a)-[:LINK {weight: 4}]->(wizardMan), (e)-[:LINK]->(d), (e)-[:OTHER_LINK {weight: -2, type: 1, source: 1337, caption: "Balloon"}]->(f); @@ -44,7 +46,9 @@ def test_from_gql_create_syntax() -> None: }, }, {"top_level": {}, "properties": {"age": 42, "pets": ["cat", False, "dog"], "labels": ["User"]}}, + {"top_level": {}, "properties": {"labels": []}}, {"top_level": {}, "properties": {"name": "Fawad", "age": 78, "labels": ["Person", "User"]}}, + {"top_level": {}, "properties": {"age": 29, "labels": []}}, ] VG = from_gql_create(query, node_caption=None, relationship_caption=None) @@ -62,7 +66,7 @@ def test_from_gql_create_syntax() -> None: {"source_idx": 4, "target_idx": 3, "top_level": {}, "properties": {"type": "LINK"}}, { "source_idx": 4, - "target_idx": 6, + "target_idx": 7, "top_level": {"caption": "Balloon"}, "properties": {"weight": -2, "type": "OTHER_LINK", "__type": 1, "source": 1337}, }, From d978c96ac7a608d68eb893bba72f19878a48e55d Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 24 Apr 2025 15:15:51 +0200 Subject: [PATCH 7/7] Allow node creation from rel expression in `from_gql_create` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/gql_create.py | 110 ++++++++++++--------- python-wrapper/tests/test_gql_create.py | 6 +- 2 files changed, 68 insertions(+), 48 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index b8ab0cd4..83bcbf3e 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -193,7 +193,6 @@ def from_gql_create( It also does not handle all possible GQL syntax, but it should work for most common cases. For more complex cases, we recommend using a Neo4j database and the `from_neo4j` method. - Parameters ---------- query : str @@ -213,11 +212,6 @@ def from_gql_create( if not re.match(r"(?i)^create\b", query): raise ValueError("Query must begin with 'CREATE' (case insensitive).") - nodes = [] - relationships = [] - alias_to_id = {} - anonymous_count = 0 - query = re.sub(r"(?i)^create\s*", "", query, count=1).rstrip(";").strip() parts = [] paren_level = 0 @@ -252,8 +246,8 @@ def from_gql_create( snippet = _get_snippet(query, len(query) - 1) raise ValueError(f"Unbalanced square brackets near: `{snippet}`.") - node_pattern = re.compile(r"^\(([^)]*)\)$") # Changed here - rel_pattern = re.compile(r"^\(([^)]+)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]+)\)$") + node_pattern = re.compile(r"^\(([^)]*)\)$") + rel_pattern = re.compile(r"^\(([^)]*)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]*)\)$") node_top_level_keys = set(Node.model_fields.keys()) node_top_level_keys.remove("id") @@ -263,7 +257,10 @@ def from_gql_create( rel_top_level_keys.remove("source") rel_top_level_keys.remove("target") - empty_set: set[str] = set() + nodes = [] + relationships = [] + alias_to_id = {} + anonymous_count = 0 for part in parts: node_m = node_pattern.match(part) @@ -276,45 +273,64 @@ def from_gql_create( if alias not in alias_to_id: alias_to_id[alias] = str(uuid.uuid4()) nodes.append(Node(id=alias_to_id[alias], **top_level, properties=props)) - else: - rel_m = rel_pattern.match(part) - if rel_m: - left_node = rel_m.group(1).strip() - rel_type = rel_m.group(2).replace(":", "").strip() - right_node = rel_m.group(4).strip() - left_alias, _, _ = _parse_labels_and_props(query, left_node, empty_set) - if not left_alias or left_alias not in alias_to_id: - snippet = _get_snippet(query, query.index(left_node)) - raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") - right_alias, _, _ = _parse_labels_and_props(query, right_node, empty_set) - if not right_alias or right_alias not in alias_to_id: - snippet = _get_snippet(query, query.index(right_node)) - raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.") - - rel_id = str(uuid.uuid4()) - rel_props_str = rel_m.group(3) or "" - if rel_props_str: - inner_str = rel_props_str.strip("{}").strip() - prop_start = query.index(inner_str, query.index(inner_str)) - top_level, props = _parse_prop_str(query, inner_str, prop_start, rel_top_level_keys) - else: - top_level = {} - props = {} - if "type" in props: - props["__type"] = props["type"] - props["type"] = rel_type - relationships.append( - Relationship( - id=rel_id, - source=alias_to_id[left_alias], - target=alias_to_id[right_alias], - **top_level, - properties=props, - ) - ) + + continue + + rel_m = rel_pattern.match(part) + if rel_m: + left_node = rel_m.group(1).strip() + right_node = rel_m.group(4).strip() + + # Parse left node pattern + left_alias, left_top_level, left_props = _parse_labels_and_props(query, left_node, node_top_level_keys) + if not left_alias: + left_alias = f"_anon_{anonymous_count}" + anonymous_count += 1 + if left_alias not in alias_to_id: + alias_to_id[left_alias] = str(uuid.uuid4()) + nodes.append(Node(id=alias_to_id[left_alias], **left_top_level, properties=left_props)) + elif left_alias not in alias_to_id: + snippet = _get_snippet(query, query.index(left_node)) + raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") + + # Parse right node pattern + right_alias, right_top_level, right_props = _parse_labels_and_props(query, right_node, node_top_level_keys) + if not right_alias: + right_alias = f"_anon_{anonymous_count}" + anonymous_count += 1 + if right_alias not in alias_to_id: + alias_to_id[right_alias] = str(uuid.uuid4()) + nodes.append(Node(id=alias_to_id[right_alias], **right_top_level, properties=right_props)) + elif right_alias not in alias_to_id: + snippet = _get_snippet(query, query.index(right_node)) + raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.") + + rel_id = str(uuid.uuid4()) + rel_type = rel_m.group(2).replace(":", "").strip() + rel_props_str = rel_m.group(3) or "" + if rel_props_str: + inner_str = rel_props_str.strip("{}").strip() + prop_start = query.index(inner_str, query.index(inner_str)) + top_level, props = _parse_prop_str(query, inner_str, prop_start, rel_top_level_keys) else: - snippet = part[:30] - raise ValueError(f"Invalid element in CREATE near: `{snippet}`.") + top_level = {} + props = {} + if "type" in props: + props["__type"] = props["type"] + props["type"] = rel_type + relationships.append( + Relationship( + id=rel_id, + source=alias_to_id[left_alias], + target=alias_to_id[right_alias], + **top_level, + properties=props, + ) + ) + continue + + snippet = part[:30] + raise ValueError(f"Invalid element in CREATE near: `{snippet}`.") if size_property is not None: for node in nodes: diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py index 679b046c..75b58c04 100644 --- a/python-wrapper/tests/test_gql_create.py +++ b/python-wrapper/tests/test_gql_create.py @@ -21,7 +21,8 @@ def test_from_gql_create_syntax() -> None: ({age: 29}), (a)-[:LINK {weight: 4}]->(wizardMan), (e)-[:LINK]->(d), - (e)-[:OTHER_LINK {weight: -2, type: 1, source: 1337, caption: "Balloon"}]->(f); + (e)-[:OTHER_LINK {weight: -2, type: 1, source: 1337, caption: "Balloon"}]->(f), + ()-[:LINK]->({name: 'Florentin'}); """ expected_node_dicts: list[dict[str, dict[str, Any]]] = [ { @@ -49,6 +50,8 @@ def test_from_gql_create_syntax() -> None: {"top_level": {}, "properties": {"labels": []}}, {"top_level": {}, "properties": {"name": "Fawad", "age": 78, "labels": ["Person", "User"]}}, {"top_level": {}, "properties": {"age": 29, "labels": []}}, + {"top_level": {}, "properties": {"labels": []}}, + {"top_level": {}, "properties": {"name": "Florentin", "labels": []}}, ] VG = from_gql_create(query, node_caption=None, relationship_caption=None) @@ -70,6 +73,7 @@ def test_from_gql_create_syntax() -> None: "top_level": {"caption": "Balloon"}, "properties": {"weight": -2, "type": "OTHER_LINK", "__type": 1, "source": 1337}, }, + {"source_idx": 9, "target_idx": 10, "top_level": {}, "properties": {"type": "LINK"}}, ] assert len(VG.relationships) == len(expected_relationships_dicts)