diff --git a/changelog.md b/changelog.md index e35e198c..46cd6c91 100644 --- a/changelog.md +++ b/changelog.md @@ -13,5 +13,7 @@ ## Improvements +* Improved error messages when constructing `VisualizationGraph`s using `from_dfs`, `from_neo4j`, `from_gds` and `from_gql_create` methods + ## Other changes diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 7be4151d..911d648f 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -97,4 +97,13 @@ def from_gds( rel_df = _rel_df(gds, G) rel_df.rename(columns={"sourceNodeId": "source", "targetNodeId": "target"}, inplace=True) - return _from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}) + try: + return _from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}) + except ValueError as e: + err_msg = str(e) + if "column" in err_msg: + err_msg = err_msg.replace("column", "property") + if ("'size'" in err_msg) and (size_property is not None): + err_msg = err_msg.replace("'size'", f"'{size_property}'") + raise ValueError(err_msg) + raise e diff --git a/python-wrapper/src/neo4j_viz/gql_create.py b/python-wrapper/src/neo4j_viz/gql_create.py index e0cca63a..e5c52965 100644 --- a/python-wrapper/src/neo4j_viz/gql_create.py +++ b/python-wrapper/src/neo4j_viz/gql_create.py @@ -2,6 +2,8 @@ import uuid from typing import Any, Optional +from pydantic import BaseModel, ValidationError + from neo4j_viz import Node, Relationship, VisualizationGraph @@ -252,6 +254,20 @@ def from_gql_create( node_top_level_keys = Node.all_validation_aliases(exempted_fields=["id"]) rel_top_level_keys = Relationship.all_validation_aliases(exempted_fields=["id", "source", "target"]) + def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) -> None: + for err in e.errors(): + loc = err["loc"][0] + if (loc == "size") and size_property is not None: + loc = size_property + if loc == "caption": + if (entity_type == Node) and (node_caption is not None): + loc = node_caption + elif (entity_type == Relationship) and (relationship_caption is not None): + loc = relationship_caption + raise ValueError( + f"Error for {entity_type.__name__.lower()} property '{loc}' with provided input '{err['input']}'. Reason: {err['msg']}" + ) + nodes = [] relationships = [] alias_to_id = {} @@ -267,7 +283,10 @@ def from_gql_create( anonymous_count += 1 if alias not in alias_to_id: alias_to_id[alias] = str(uuid.uuid4()) - nodes.append(Node(id=alias_to_id[alias], **top_level, properties=props)) + try: + nodes.append(Node(id=alias_to_id[alias], **top_level, properties=props)) + except ValidationError as e: + _parse_validation_error(e, Node) continue @@ -283,7 +302,10 @@ def from_gql_create( anonymous_count += 1 if left_alias not in alias_to_id: alias_to_id[left_alias] = str(uuid.uuid4()) - nodes.append(Node(id=alias_to_id[left_alias], **left_top_level, properties=left_props)) + try: + nodes.append(Node(id=alias_to_id[left_alias], **left_top_level, properties=left_props)) + except ValidationError as e: + _parse_validation_error(e, Node) elif left_alias not in alias_to_id: snippet = _get_snippet(query, query.index(left_node)) raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.") @@ -295,7 +317,10 @@ def from_gql_create( anonymous_count += 1 if right_alias not in alias_to_id: alias_to_id[right_alias] = str(uuid.uuid4()) - nodes.append(Node(id=alias_to_id[right_alias], **right_top_level, properties=right_props)) + try: + nodes.append(Node(id=alias_to_id[right_alias], **right_top_level, properties=right_props)) + except ValidationError as e: + _parse_validation_error(e, Node) elif right_alias not in alias_to_id: snippet = _get_snippet(query, query.index(right_node)) raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.") @@ -313,15 +338,20 @@ def from_gql_create( if "type" in props: props["__type"] = props["type"] props["type"] = rel_type - relationships.append( - Relationship( - id=rel_id, - source=alias_to_id[left_alias], - target=alias_to_id[right_alias], - **top_level, - properties=props, + + try: + relationships.append( + Relationship( + id=rel_id, + source=alias_to_id[left_alias], + target=alias_to_id[right_alias], + **top_level, + properties=props, + ) ) - ) + except ValidationError as e: + _parse_validation_error(e, Relationship) + continue snippet = part[:30] @@ -346,6 +376,10 @@ def from_gql_create( VG = VisualizationGraph(nodes=nodes, relationships=relationships) if (node_radius_min_max is not None) and (size_property is not None): - VG.resize_nodes(node_radius_min_max=node_radius_min_max) + try: + VG.resize_nodes(node_radius_min_max=node_radius_min_max) + except TypeError: + loc = "size" if size_property is None else size_property + raise ValueError(f"Error for node property '{loc}'. Reason: must be a numerical value") return VG diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index 8973acaf..d5eeb408 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -4,12 +4,21 @@ import neo4j.graph from neo4j import Result +from pydantic import BaseModel, ValidationError from neo4j_viz.node import Node from neo4j_viz.relationship import Relationship from neo4j_viz.visualization_graph import VisualizationGraph +def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) -> None: + for err in e.errors(): + loc = err["loc"][0] + raise ValueError( + f"Error for {entity_type.__name__.lower()} property '{loc}' with provided input '{err['input']}'. Reason: {err['msg']}" + ) + + def from_neo4j( result: Union[neo4j.graph.Graph, Result], size_property: Optional[str] = None, @@ -50,14 +59,30 @@ def from_neo4j( all_node_field_aliases = Node.all_validation_aliases() all_rel_field_aliases = Relationship.all_validation_aliases() - nodes = [ - _map_node(node, all_node_field_aliases, size_property, caption_property=node_caption) for node in graph.nodes - ] + try: + nodes = [ + _map_node(node, all_node_field_aliases, size_property, caption_property=node_caption) + for node in graph.nodes + ] + except ValueError as e: + err_msg = str(e) + if ("'size'" in err_msg) and (size_property is not None): + err_msg = err_msg.replace("'size'", f"'{size_property}'") + elif ("'caption'" in err_msg) and (node_caption is not None): + err_msg = err_msg.replace("'caption'", f"'{node_caption}'") + raise ValueError(err_msg) + relationships = [] - for rel in graph.relationships: - mapped_rel = _map_relationship(rel, all_rel_field_aliases, caption_property=relationship_caption) - if mapped_rel: - relationships.append(mapped_rel) + try: + for rel in graph.relationships: + mapped_rel = _map_relationship(rel, all_rel_field_aliases, caption_property=relationship_caption) + if mapped_rel: + relationships.append(mapped_rel) + except ValueError as e: + err_msg = str(e) + if ("'caption'" in err_msg) and (relationship_caption is not None): + err_msg = err_msg.replace("'caption'", f"'{relationship_caption}'") + raise ValueError(err_msg) VG = VisualizationGraph(nodes, relationships) @@ -102,7 +127,12 @@ def _map_node( properties["__labels"] = properties["labels"] properties["labels"] = labels - return Node(**top_level_fields, properties=properties) + try: + viz_node = Node(**top_level_fields, properties=properties) + except ValidationError as e: + _parse_validation_error(e, Node) + + return viz_node def _map_relationship( @@ -135,4 +165,9 @@ def _map_relationship( properties["__type"] = properties["type"] properties["type"] = rel.type - return Relationship(**top_level_fields, properties=properties) + try: + viz_rel = Relationship(**top_level_fields, properties=properties) + except ValidationError as e: + _parse_validation_error(e, Relationship) + + return viz_rel diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py index 694ebf6e..15e29c0e 100644 --- a/python-wrapper/src/neo4j_viz/pandas.py +++ b/python-wrapper/src/neo4j_viz/pandas.py @@ -4,6 +4,7 @@ from typing import Optional, Union from pandas import DataFrame +from pydantic import BaseModel, ValidationError from .node import Node from .relationship import Relationship @@ -12,6 +13,19 @@ DFS_TYPE = Union[DataFrame, Iterable[DataFrame]] +def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) -> None: + for err in e.errors(): + loc = err["loc"][0] + if err["type"] == "missing": + raise ValueError( + f"Mandatory {entity_type.__name__.lower()} column '{loc}' is missing. Expected one of {entity_type.model_fields[loc].validation_alias.choices} to be present" # type: ignore + ) + else: + raise ValueError( + f"Error for {entity_type.__name__.lower()} column '{loc}' with provided input '{err['input']}'. Reason: {err['msg']}" + ) + + def _from_dfs( node_dfs: Optional[DFS_TYPE], rel_dfs: DFS_TYPE, @@ -63,7 +77,11 @@ def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]] key = rename_properties[key] properties[key] = value - nodes.append(Node(**top_level, properties=properties)) + try: + nodes.append(Node(**top_level, properties=properties)) + except ValidationError as e: + _parse_validation_error(e, Node) + return nodes, has_size @@ -88,7 +106,11 @@ def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str key = rename_properties[key] properties[key] = value - relationships.append(Relationship(**top_level, properties=properties)) + try: + relationships.append(Relationship(**top_level, properties=properties)) + except ValidationError as e: + _parse_validation_error(e, Relationship) + return relationships diff --git a/python-wrapper/tests/test_gds.py b/python-wrapper/tests/test_gds.py index bad5e4e1..a5243459 100644 --- a/python-wrapper/tests/test_gds.py +++ b/python-wrapper/tests/test_gds.py @@ -132,3 +132,50 @@ def test_from_gds_mocked(mocker: MockerFixture) -> None: (1, 2, "REL2"), (2, 0, "REL"), ] + + +@pytest.mark.requires_neo4j_and_gds +def test_from_gds_node_errors(gds: Any) -> None: + from neo4j_viz.gds import from_gds + + nodes = pd.DataFrame( + { + "nodeId": [0, 1, 2], + "labels": [["A"], ["C"], ["A", "B"]], + "component": [1, 4, 2], + "score": [1337, -42, 3.14], + "size": [-0.1, 0.2, 0.3], + } + ) + rels = pd.DataFrame( + { + "sourceNodeId": [0, 1, 2], + "targetNodeId": [1, 2, 0], + "relationshipType": ["REL", "REL2", "REL"], + } + ) + + with gds.graph.construct("flo", nodes, rels) as G: + with pytest.raises( + ValueError, + match=r"Error for node property 'size' with provided input '-0.1'. Reason: Input should be greater than or equal to 0", + ): + from_gds( + gds, + G, + additional_node_properties=["component", "size"], + node_radius_min_max=None, + ) + + with gds.graph.construct("flo", nodes, rels) as G: + with pytest.raises( + ValueError, + match=r"Error for node property 'score' with provided input '-42.0'. Reason: Input should be greater than or equal to 0", + ): + from_gds( + gds, + G, + size_property="score", + additional_node_properties=["component", "size"], + node_radius_min_max=None, + ) diff --git a/python-wrapper/tests/test_gql_create.py b/python-wrapper/tests/test_gql_create.py index 75b58c04..e18f74d3 100644 --- a/python-wrapper/tests/test_gql_create.py +++ b/python-wrapper/tests/test_gql_create.py @@ -217,3 +217,30 @@ def test_no_create_keyword() -> None: query = "(a:User {y:4})" with pytest.raises(ValueError, match=r"Query must begin with 'CREATE' \(case insensitive\)."): from_gql_create(query) + + +def test_illegal_node_x() -> None: + query = "CREATE (a:User {x:'tennis'})" + with pytest.raises( + ValueError, + match="Error for node property 'x' with provided input 'tennis'. Reason: Input should be a valid integer, unable to parse string as an integer", + ): + from_gql_create(query) + + +def test_illegal_node_size() -> None: + query = "CREATE (a:User {hello: 'tennis'})" + with pytest.raises( + ValueError, + match="Error for node property 'hello'. Reason: must be a numerical value", + ): + from_gql_create(query, size_property="hello") + + +def test_illegal_rel_caption_size() -> None: + query = "CREATE ()-[:LINK {caption_size: -42}]->()" + with pytest.raises( + ValueError, + match="Error for relationship property 'caption_size' with provided input '-42'. Reason: Input should be greater than 0", + ): + from_gql_create(query) diff --git a/python-wrapper/tests/test_neo4j.py b/python-wrapper/tests/test_neo4j.py index 7182087c..779e36c9 100644 --- a/python-wrapper/tests/test_neo4j.py +++ b/python-wrapper/tests/test_neo4j.py @@ -161,3 +161,43 @@ def test_from_neo4j_graph_full(neo4j_session: Session) -> None: (node_ids[1], node_ids[0], "2015"), (node_ids[0], node_ids[1], "2025"), ] + + +@pytest.mark.requires_neo4j_and_gds +def test_from_neo4j_node_error(neo4j_session: Session) -> None: + neo4j_session.run("MATCH (n:_CI_A|_CI_B) DETACH DELETE n") + neo4j_session.run( + "CREATE (a:_CI_A {name:'Alice', height:20, id:42, _id: 1337, caption: 'hello', caption_size: -5})" + ) + graph = neo4j_session.run("MATCH (a:_CI_A) RETURN a").graph() + + with pytest.raises( + ValueError, + match="Error for node property 'caption_size' with provided input '-5'. Reason: Input should be greater than or equal to 1", + ): + from_neo4j(graph) + + neo4j_session.run("MATCH (n:_CI_A|_CI_B) DETACH DELETE n") + neo4j_session.run("CREATE (a:_CI_A {name:'Alice', height:20, id:42, _id: 1337, hello: -5})") + graph = neo4j_session.run("MATCH (a:_CI_A) RETURN a").graph() + with pytest.raises( + ValueError, + match="Error for node property 'hello' with provided input '-5'. Reason: Input should be greater than or equal to 0", + ): + from_neo4j(graph, size_property="hello") + + +@pytest.mark.requires_neo4j_and_gds +def test_from_neo4j_rel_error(neo4j_session: Session) -> None: + neo4j_session.run("MATCH (n:_CI_A|_CI_B) DETACH DELETE n") + neo4j_session.run( + "CREATE (a:_CI_A {name:'Alice', height:20, id:42, _id: 1337, caption: 'hello'})-[:KNOWS {year: 2025, id: 41, source: 1, target: 2, caption_align: 'banana'}]->" + "(b:_CI_A:_CI_B {name:'Bob', height:10, id: 84, size: 11, labels: [1,2]})" + ) + graph = neo4j_session.run("MATCH (a:_CI_A|_CI_B)-[r]->(b) RETURN a, b, r ORDER BY a").graph() + + with pytest.raises( + ValueError, + match="Error for relationship property 'caption_align' with provided input 'banana'. Reason: Input should be 'top', 'center' or 'bottom'", + ): + from_neo4j(graph) diff --git a/python-wrapper/tests/test_pandas.py b/python-wrapper/tests/test_pandas.py index 76fb77f6..7da45bf1 100644 --- a/python-wrapper/tests/test_pandas.py +++ b/python-wrapper/tests/test_pandas.py @@ -1,3 +1,4 @@ +import pytest from pandas import DataFrame from pydantic_extra_types.color import Color @@ -130,3 +131,62 @@ def test_from_dfs() -> None: assert VG.relationships[1].source == 1 assert VG.relationships[1].target == 0 assert VG.relationships[1].caption == "REL2" + + +def test_node_errors() -> None: + nodes = DataFrame( + {"caption": ["A", "B"], "size": [1337, 42], "color": "#FF0000", "instrument": ["piano", "guitar"]} + ) + with pytest.raises( + ValueError, + match=r"Mandatory node column 'id' is missing. Expected one of \['id', 'ID', 'id', 'nodeid', 'NODEID', 'nodeid', 'node_id', 'NODE_ID', 'nodeId'\] to be present", + ): + from_dfs(nodes, []) + + nodes = DataFrame( + { + "id": [0, 1], + "caption": ["A", "B"], + "size": ["aaa", 42], + "color": "#FF0000", + "instrument": ["piano", "guitar"], + } + ) + with pytest.raises( + ValueError, + match=r"Error for node column 'size' with provided input 'aaa'. Reason: Input should be a valid integer, unable to parse string as an integer", + ): + from_dfs(nodes, []) + + +def test_rel_errors() -> None: + nodes = DataFrame( + {"id": [0, 1], "caption": ["A", "B"], "size": [1337, 42], "color": "#FF0000", "instrument": ["piano", "guitar"]} + ) + relationships = DataFrame( + { + "target": [1, 0], + "caption": ["REL", "REL2"], + "weight": [1.0, 2.0], + } + ) + with pytest.raises( + ValueError, + match=r"Mandatory relationship column 'source' is missing. Expected one of \['source', 'SOURCE', 'source', 'sourcenodeid', 'SOURCENODEID', 'sourcenodeid', 'source_node_id', 'SOURCE_NODE_ID', 'sourceNodeId', 'from', 'FROM', 'from'\] to be present", + ): + from_dfs(nodes, relationships) + + relationships = DataFrame( + { + "source": [0, 1], + "target": [1, 0], + "caption": ["REL", "REL2"], + "caption_size": [1.0, -300], + "weight": [1.0, 2.0], + } + ) + with pytest.raises( + ValueError, + match=r"Error for relationship column 'caption_size' with provided input '-300.0'. Reason: Input should be greater than 0", + ): + from_dfs(nodes, relationships)