From 976337165249f134f430ffc5015c3de908f3d990 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Fri, 11 Apr 2025 16:52:21 +0200 Subject: [PATCH 01/10] Start moving custom fields into properties dict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only `Node`, `Relationship` and `from_neo4j` updated so far. Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/neo4j.py | 90 +++++++++---------- python-wrapper/src/neo4j_viz/node.py | 2 + python-wrapper/src/neo4j_viz/relationship.py | 2 + python-wrapper/tests/test_neo4j.py | 92 +++++++++++--------- python-wrapper/tests/test_node.py | 5 ++ python-wrapper/tests/test_relationship.py | 11 +-- 6 files changed, 109 insertions(+), 93 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index 72c350ea..5092a995 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -1,7 +1,6 @@ from __future__ import annotations -from collections.abc import Iterable -from typing import Any, Optional, Union +from typing import Optional, Union import neo4j.graph from neo4j import Result @@ -22,8 +21,8 @@ def from_neo4j( Create a VisualizationGraph from a Neo4j Graph or Neo4j Result object. All node and relationship properties will be included in the visualization graph. - If the property names are conflicting with those of `Node` and `Relationship` objects, they will be prefixed - with `__`. + If the properties are named as the fields of the `Node` or `Relationship` classes, they will be included as + top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. Parameters ---------- @@ -63,69 +62,64 @@ def from_neo4j( def _map_node(node: neo4j.graph.Node, size_property: Optional[str], caption_property: Optional[str]) -> Node: - labels = sorted([label for label in node.labels]) + top_level_fields = {"id": node.element_id} if size_property: - size = node.get(size_property) - else: - size = None + top_level_fields["size"] = node.get(size_property) + labels = sorted([label for label in node.labels]) if caption_property: if caption_property == "labels": if len(labels) > 0: - caption = ":".join([label for label in labels]) - else: - caption = None + top_level_fields["caption"] = ":".join([label for label in labels]) else: - caption = str(node.get(caption_property)) + top_level_fields["caption"] = str(node.get(caption_property)) + + properties = {} + for prop, value in node.items(): + if prop not in Node.model_fields.keys(): + properties[prop] = value + continue + + if prop in top_level_fields: + properties[prop] = value + continue - base_node_props = dict(id=node.element_id, caption=caption, labels=labels, size=size) + top_level_fields[prop] = value - protected_props = base_node_props.keys() - additional_node_props = {k: v for k, v in node.items()} - additional_node_props = _rename_protected_props(additional_node_props, protected_props) + if "labels" in properties: + properties["__labels"] = properties["labels"] + properties["labels"] = labels - return Node(**base_node_props, **additional_node_props) + return Node(**top_level_fields, properties=properties) def _map_relationship(rel: neo4j.graph.Relationship, caption_property: Optional[str]) -> Optional[Relationship]: if rel.start_node is None or rel.end_node is None: return None + top_level_fields = {"id": rel.element_id, "source": rel.start_node.element_id, "target": rel.end_node.element_id} + if caption_property: if caption_property == "type": - caption = rel.type + top_level_fields["caption"] = rel.type else: - caption = str(rel.get(caption_property)) - else: - caption = None - - base_rel_props = dict( - id=rel.element_id, - source=rel.start_node.element_id, - target=rel.end_node.element_id, - _type=rel.type, - caption=caption, - ) - - protected_props = base_rel_props.keys() - additional_rel_props = {k: v for k, v in rel.items()} - additional_rel_props = _rename_protected_props(additional_rel_props, protected_props) - - return Relationship( - **base_rel_props, - **additional_rel_props, - ) - - -def _rename_protected_props( - additional_props: dict[str, Any], - protected_props: Iterable[str], -) -> dict[str, Union[str, int, float]]: - for prop in protected_props: - if prop not in additional_props: + top_level_fields["caption"] = str(rel.get(caption_property)) + + properties = {} + for prop, value in rel.items(): + if prop not in Relationship.model_fields.keys(): + properties[prop] = value continue - additional_props[f"__{prop}"] = additional_props.pop(prop) + if prop in top_level_fields: + properties[prop] = value + continue + + top_level_fields[prop] = value + + if "type" in properties: + properties["__type"] = properties["type"] + properties["type"] = rel.type - return additional_props + return Relationship(**top_level_fields, properties=properties) diff --git a/python-wrapper/src/neo4j_viz/node.py b/python-wrapper/src/neo4j_viz/node.py index 8718a497..52eced11 100644 --- a/python-wrapper/src/neo4j_viz/node.py +++ b/python-wrapper/src/neo4j_viz/node.py @@ -45,6 +45,8 @@ class Node(BaseModel, extra="allow"): x: Optional[RealNumber] = Field(None, description="The x-coordinate of the node") #: The y-coordinate of the node y: Optional[RealNumber] = Field(None, description="The y-coordinate of the node") + #: The properties of the node + properties: dict[str, Any] = Field(default_factory=dict, description="The properties of the node") @field_serializer("color") def serialize_color(self, color: Color) -> str: diff --git a/python-wrapper/src/neo4j_viz/relationship.py b/python-wrapper/src/neo4j_viz/relationship.py index b5f4c640..10e938ec 100644 --- a/python-wrapper/src/neo4j_viz/relationship.py +++ b/python-wrapper/src/neo4j_viz/relationship.py @@ -43,6 +43,8 @@ class Relationship(BaseModel, extra="allow"): ) #: The color of the relationship. Allowed input is for example "#FF0000", "red" or (255, 0, 0) color: Optional[ColorType] = Field(None, description="The color of the relationship") + #: The properties of the relationship + properties: dict[str, Any] = Field(default_factory=dict, description="The properties of the relationship") @field_serializer("color") def serialize_color(self, color: Color) -> str: diff --git a/python-wrapper/tests/test_neo4j.py b/python-wrapper/tests/test_neo4j.py index 9c80766d..7182087c 100644 --- a/python-wrapper/tests/test_neo4j.py +++ b/python-wrapper/tests/test_neo4j.py @@ -19,7 +19,7 @@ def graph_setup(neo4j_session: Session) -> Generator[None, None, None]: @pytest.mark.requires_neo4j_and_gds -def test_from_neo4j_graph(neo4j_session: Session) -> None: +def test_from_neo4j_graph_basic(neo4j_session: Session) -> None: graph = neo4j_session.run("MATCH (a:_CI_A|_CI_B)-[r]->(b) RETURN a, b, r ORDER BY a").graph() VG = from_neo4j(graph) @@ -31,27 +31,31 @@ def test_from_neo4j_graph(neo4j_session: Session) -> None: Node( id=node_ids[0], caption="_CI_A", - labels=["_CI_A"], - name="Alice", - height=20, - __id=42, - _id=1337, - __caption="hello", + properties=dict( + labels=["_CI_A"], + name="Alice", + height=20, + id=42, + _id=1337, + caption="hello", + ), ), Node( id=node_ids[1], caption="_CI_A:_CI_B", - labels=["_CI_A", "_CI_B"], - name="Bob", - height=10, - __id=84, - __size=11, - __labels=[1, 2], + size=11, + properties=dict( + labels=["_CI_A", "_CI_B"], + name="Bob", + height=10, + id=84, + __labels=[1, 2], + ), ), ] assert len(VG.nodes) == 2 - assert sorted(VG.nodes, key=lambda x: x.name) == expected_nodes # type: ignore[attr-defined] + assert sorted(VG.nodes, key=lambda x: x.properties["name"]) == expected_nodes assert len(VG.relationships) == 2 vg_rels = sorted([(e.source, e.target, e.caption) for e in VG.relationships], key=lambda x: x[2] if x[2] else "foo") @@ -76,27 +80,31 @@ def test_from_neo4j_result(neo4j_session: Session) -> None: Node( id=node_ids[0], caption="_CI_A", - labels=["_CI_A"], - name="Alice", - height=20, - __id=42, - _id=1337, - __caption="hello", + properties=dict( + labels=["_CI_A"], + name="Alice", + height=20, + id=42, + _id=1337, + caption="hello", + ), ), Node( id=node_ids[1], caption="_CI_A:_CI_B", - labels=["_CI_A", "_CI_B"], - name="Bob", - height=10, - __id=84, - __size=11, - __labels=[1, 2], + size=11, + properties=dict( + labels=["_CI_A", "_CI_B"], + name="Bob", + height=10, + id=84, + __labels=[1, 2], + ), ), ] assert len(VG.nodes) == 2 - assert sorted(VG.nodes, key=lambda x: x.name) == expected_nodes # type: ignore[attr-defined] + assert sorted(VG.nodes, key=lambda x: x.properties["name"]) == expected_nodes assert len(VG.relationships) == 2 vg_rels = sorted([(e.source, e.target, e.caption) for e in VG.relationships], key=lambda x: x[2] if x[2] else "foo") @@ -119,29 +127,33 @@ def test_from_neo4j_graph_full(neo4j_session: Session) -> None: Node( id=node_ids[0], caption="Alice", - labels=["_CI_A"], - name="Alice", - height=20, size=60.0, - __id=42, - _id=1337, - __caption="hello", + properties=dict( + labels=["_CI_A"], + name="Alice", + height=20, + id=42, + _id=1337, + caption="hello", + ), ), Node( id=node_ids[1], caption="Bob", - labels=["_CI_A", "_CI_B"], - name="Bob", - height=10, size=3.0, - __id=84, - __size=11, - __labels=[1, 2], + properties=dict( + labels=["_CI_A", "_CI_B"], + name="Bob", + size=11, + height=10, + id=84, + __labels=[1, 2], + ), ), ] assert len(VG.nodes) == 2 - assert sorted(VG.nodes, key=lambda x: x.name) == expected_nodes # type: ignore[attr-defined] + assert sorted(VG.nodes, key=lambda x: x.properties["name"]) == expected_nodes assert len(VG.relationships) == 2 vg_rels = sorted([(e.source, e.target, e.caption) for e in VG.relationships], key=lambda x: x[2] if x[2] else "foo") diff --git a/python-wrapper/tests/test_node.py b/python-wrapper/tests/test_node.py index faa32e5b..a15031f6 100644 --- a/python-wrapper/tests/test_node.py +++ b/python-wrapper/tests/test_node.py @@ -26,6 +26,7 @@ def test_nodes_with_all_options() -> None: "pinned": True, "x": 1, "y": 10, + "properties": {}, } @@ -36,6 +37,7 @@ def test_nodes_minimal_node() -> None: assert node.to_dict() == { "id": "1", + "properties": {}, } @@ -48,6 +50,7 @@ def test_node_with_float_size() -> None: assert node.to_dict() == { "id": "1", "size": 10.2, + "properties": {}, } @@ -60,6 +63,7 @@ def test_node_with_additional_fields() -> None: assert node.to_dict() == { "id": "1", "componentId": 2, + "properties": {}, } @@ -69,6 +73,7 @@ def test_id_aliases(alias: str) -> None: assert node.to_dict() == { "id": "1", + "properties": {}, } diff --git a/python-wrapper/tests/test_relationship.py b/python-wrapper/tests/test_relationship.py index 171c7af8..3b373443 100644 --- a/python-wrapper/tests/test_relationship.py +++ b/python-wrapper/tests/test_relationship.py @@ -23,6 +23,7 @@ def test_rels_with_all_options() -> None: "captionAlign": "top", "captionSize": 12, "color": "#ff0000", + "properties": {}, } @@ -34,7 +35,7 @@ def test_rels_minimal_rel() -> None: rel_dict = rel.to_dict() - assert {"id", "from", "to"} == set(rel_dict.keys()) + assert {"id", "from", "to", "properties"} == set(rel_dict.keys()) assert rel_dict["from"] == "1" assert rel_dict["to"] == "2" @@ -43,12 +44,12 @@ def test_rels_additional_fields() -> None: rel = Relationship( source="1", target="2", - componentId=2, + properties=dict(componentId=2), ) rel_dict = rel.to_dict() - assert {"id", "from", "to", "componentId"} == set(rel_dict.keys()) - assert rel.componentId == 2 # type: ignore[attr-defined] + assert {"id", "from", "to", "properties"} == set(rel_dict.keys()) + assert rel.properties["componentId"] == 2 @pytest.mark.parametrize("src_alias", ["source", "sourceNodeId", "source_node_id", "from"]) @@ -63,6 +64,6 @@ def test_aliases(src_alias: str, trg_alias: str) -> None: rel_dict = rel.to_dict() - assert {"id", "from", "to"} == set(rel_dict.keys()) + assert {"id", "from", "to", "properties"} == set(rel_dict.keys()) assert rel_dict["from"] == "1" assert rel_dict["to"] == "2" From 9cc8778009a089f974a54da85a8b483abd0e8a45 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Wed, 16 Apr 2025 11:52:44 +0200 Subject: [PATCH 02/10] Updating `from_dfs` to handle properties dict --- python-wrapper/src/neo4j_viz/pandas.py | 28 +++++++++++++++++++++----- python-wrapper/tests/test_pandas.py | 14 ++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py index 9197b2c5..fda17f22 100644 --- a/python-wrapper/src/neo4j_viz/pandas.py +++ b/python-wrapper/src/neo4j_viz/pandas.py @@ -16,7 +16,11 @@ def from_dfs( node_dfs: DFS_TYPE, rel_dfs: DFS_TYPE, node_radius_min_max: Optional[tuple[float, float]] = (3, 60) ) -> VisualizationGraph: """ - Create a VisualizationGraph from two pandas DataFrames. + Create a VisualizationGraph from pandas DataFrames representing a graph. + + All columns will be included in the visualization graph. + If the columns are named as the fields of the `Node` or `Relationship` classes, they will be included as + top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. Parameters ---------- @@ -38,8 +42,15 @@ def from_dfs( for node_df in node_dfs_iter: has_size &= "size" in node_df.columns for _, row in node_df.iterrows(): - node = Node(**row.to_dict()) - nodes.append(node) + top_level = {} + properties = {} + for key, value in row.to_dict().items(): + if key in Node.model_fields.keys(): + top_level[key] = value + else: + properties[key] = value + + nodes.append(Node(**top_level, properties=properties)) if isinstance(rel_dfs, DataFrame): rel_dfs_iter: Iterable[DataFrame] = [rel_dfs] @@ -49,8 +60,15 @@ def from_dfs( relationships = [] for rel_df in rel_dfs_iter: for _, row in rel_df.iterrows(): - rel = Relationship(**row.to_dict()) - relationships.append(rel) + top_level = {} + properties = {} + for key, value in row.to_dict().items(): + if key in Relationship.model_fields.keys(): + top_level[key] = value + else: + properties[key] = value + + relationships.append(Relationship(**top_level, properties=properties)) VG = VisualizationGraph(nodes=nodes, relationships=relationships) diff --git a/python-wrapper/tests/test_pandas.py b/python-wrapper/tests/test_pandas.py index d901b65e..44329261 100644 --- a/python-wrapper/tests/test_pandas.py +++ b/python-wrapper/tests/test_pandas.py @@ -6,18 +6,14 @@ def test_from_df() -> None: nodes = DataFrame( - { - "id": [0, 1], - "caption": ["A", "B"], - "size": [1337, 42], - "color": "#FF0000", - } + {"id": [0, 1], "caption": ["A", "B"], "size": [1337, 42], "color": "#FF0000", "instrument": ["piano", "guitar"]} ) relationships = DataFrame( { "source": [0, 1], "target": [1, 0], "caption": ["REL", "REL2"], + "weight": [1.0, 2.0], } ) VG = from_dfs(nodes, relationships, node_radius_min_max=(42, 1337)) @@ -28,21 +24,25 @@ def test_from_df() -> None: assert VG.nodes[0].caption == "A" assert VG.nodes[0].size == 1337 assert VG.nodes[0].color == Color("#ff0000") + assert VG.nodes[0].properties == {"instrument": "piano"} assert VG.nodes[1].id == 1 assert VG.nodes[1].caption == "B" assert VG.nodes[1].size == 42 - assert VG.nodes[0].color == Color("#ff0000") + assert VG.nodes[1].color == Color("#ff0000") + assert VG.nodes[1].properties == {"instrument": "guitar"} assert len(VG.relationships) == 2 assert VG.relationships[0].source == 0 assert VG.relationships[0].target == 1 assert VG.relationships[0].caption == "REL" + assert VG.relationships[0].properties == {"weight": 1.0} assert VG.relationships[1].source == 1 assert VG.relationships[1].target == 0 assert VG.relationships[1].caption == "REL2" + assert VG.relationships[1].properties == {"weight": 2.0} def test_from_dfs() -> None: From 992276b385cd621e0c5adf16f3c661f72ddbcb01 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Wed, 16 Apr 2025 12:30:28 +0200 Subject: [PATCH 03/10] Updating `from_gds` to handle properties dict --- python-wrapper/src/neo4j_viz/gds.py | 10 +++++++++- python-wrapper/src/neo4j_viz/pandas.py | 11 ++++++++++- python-wrapper/tests/test_gds.py | 27 +++++++++++++++++--------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index e16baf92..60123e15 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -35,6 +35,10 @@ def from_gds( """ Create a VisualizationGraph from a GraphDataScience object and a Graph object. + All `additional_node_properties` will be included in the visualization graph. + If the properties are named as the fields of the `Node` class, they will be included as top level fields of the + created `Node` objects. Otherwise, they will be included in the `properties` dictionary. + Parameters ---------- gds : GraphDataScience @@ -75,9 +79,13 @@ def from_gds( node_props_df = pd.concat(node_dfs.values(), ignore_index=True, axis=0).drop_duplicates() if size_property is not None: + if "size" in actual_node_properties and size_property != "size": + node_props_df.rename(columns={"size": "__size"}, inplace=True) node_props_df.rename(columns={size_property: "size"}, inplace=True) for lbl, df in node_dfs.items(): + if "labels" in actual_node_properties: + df.rename(columns={"labels": "__labels"}, inplace=True) df["labels"] = lbl node_lbls_df = pd.concat([df[["id", "labels"]] for df in node_dfs.values()], ignore_index=True, axis=0) @@ -88,4 +96,4 @@ def from_gds( rel_df = _rel_df(gds, G) rel_df.rename(columns={"sourceNodeId": "source", "targetNodeId": "target"}, inplace=True) - return from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max) + return from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}) diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py index fda17f22..d01ec1ac 100644 --- a/python-wrapper/src/neo4j_viz/pandas.py +++ b/python-wrapper/src/neo4j_viz/pandas.py @@ -13,7 +13,10 @@ def from_dfs( - node_dfs: DFS_TYPE, rel_dfs: DFS_TYPE, node_radius_min_max: Optional[tuple[float, float]] = (3, 60) + node_dfs: DFS_TYPE, + rel_dfs: DFS_TYPE, + node_radius_min_max: Optional[tuple[float, float]] = (3, 60), + rename_properties: Optional[dict[str, str]] = None, ) -> VisualizationGraph: """ Create a VisualizationGraph from pandas DataFrames representing a graph. @@ -31,6 +34,8 @@ def from_dfs( node_radius_min_max : tuple[float, float], optional Minimum and maximum node radius. To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range. + rename_properties : dict[str, str], optional + An optional map for renaming certain column names when they are converted to properties. """ if isinstance(node_dfs, DataFrame): node_dfs_iter: Iterable[DataFrame] = [node_dfs] @@ -48,6 +53,8 @@ def from_dfs( if key in Node.model_fields.keys(): top_level[key] = value else: + if rename_properties and key in rename_properties: + key = rename_properties[key] properties[key] = value nodes.append(Node(**top_level, properties=properties)) @@ -66,6 +73,8 @@ def from_dfs( if key in Relationship.model_fields.keys(): top_level[key] = value else: + if rename_properties and key in rename_properties: + key = rename_properties[key] properties[key] = value relationships.append(Relationship(**top_level, properties=properties)) diff --git a/python-wrapper/tests/test_gds.py b/python-wrapper/tests/test_gds.py index c6610a4a..bad5e4e1 100644 --- a/python-wrapper/tests/test_gds.py +++ b/python-wrapper/tests/test_gds.py @@ -17,6 +17,7 @@ def test_from_gds_integration(gds: Any) -> None: "labels": [["A"], ["C"], ["A", "B"]], "score": [1337, 42, 3.14], "component": [1, 4, 2], + "size": [0.1, 0.2, 0.3], } ) rels = pd.DataFrame( @@ -29,18 +30,24 @@ def test_from_gds_integration(gds: Any) -> None: with gds.graph.construct("flo", nodes, rels) as G: VG = from_gds( - gds, G, size_property="score", additional_node_properties=["component"], node_radius_min_max=(3.14, 1337) + gds, + G, + size_property="score", + additional_node_properties=["component", "size"], + node_radius_min_max=(3.14, 1337), ) assert len(VG.nodes) == 3 assert sorted(VG.nodes, key=lambda x: x.id) == [ - Node(id=0, labels=["A"], size=float(1337), component=float(1)), - Node(id=1, labels=["C"], size=float(42), component=float(4)), - Node(id=2, labels=["A", "B"], size=float(3.14), component=float(2)), + Node(id=0, size=float(1337), properties=dict(labels=["A"], component=float(1), size=0.1)), + Node(id=1, size=float(42), properties=dict(labels=["C"], component=float(4), size=0.2)), + Node(id=2, size=float(3.14), properties=dict(labels=["A", "B"], component=float(2), size=0.3)), ] assert len(VG.relationships) == 3 - vg_rels = sorted([(e.source, e.target, e.relationshipType) for e in VG.relationships], key=lambda x: x[0]) # type: ignore[attr-defined] + vg_rels = sorted( + [(e.source, e.target, e.properties["relationshipType"]) for e in VG.relationships], key=lambda x: x[0] + ) assert vg_rels == [ (0, 1, "REL"), (1, 2, "REL2"), @@ -111,13 +118,15 @@ def test_from_gds_mocked(mocker: MockerFixture) -> None: assert len(VG.nodes) == 3 assert sorted(VG.nodes, key=lambda x: x.id) == [ - Node(id=0, labels=["A"], size=float(1337), component=float(1)), - Node(id=1, labels=["C"], size=float(42), component=float(4)), - Node(id=2, labels=["A", "B"], size=float(3.14), component=float(2)), + Node(id=0, size=float(1337), properties=dict(labels=["A"], component=float(1))), + Node(id=1, size=float(42), properties=dict(labels=["C"], component=float(4))), + Node(id=2, size=float(3.14), properties=dict(labels=["A", "B"], component=float(2))), ] assert len(VG.relationships) == 3 - vg_rels = sorted([(e.source, e.target, e.relationshipType) for e in VG.relationships], key=lambda x: x[0]) # type: ignore[attr-defined] + vg_rels = sorted( + [(e.source, e.target, e.properties["relationshipType"]) for e in VG.relationships], key=lambda x: x[0] + ) assert vg_rels == [ (0, 1, "REL"), (1, 2, "REL2"), From 2db5612d28e3bc873f807582e3d24fea00d1a823 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Wed, 16 Apr 2025 15:12:43 +0200 Subject: [PATCH 04/10] Update color_nodes signature --- python-wrapper/src/neo4j_viz/colors.py | 2 +- .../src/neo4j_viz/visualization_graph.py | 76 ++++++++----- python-wrapper/tests/test_colors.py | 102 +++++++++++------- 3 files changed, 115 insertions(+), 65 deletions(-) diff --git a/python-wrapper/src/neo4j_viz/colors.py b/python-wrapper/src/neo4j_viz/colors.py index bbea387e..d28814ee 100644 --- a/python-wrapper/src/neo4j_viz/colors.py +++ b/python-wrapper/src/neo4j_viz/colors.py @@ -7,7 +7,7 @@ ColorsType = Union[dict[Any, ColorType], Iterable[ColorType]] -class PropertyType(Enum): +class ColorSpace(Enum): DISCRETE = "discrete" CONTINUOUS = "continuous" diff --git a/python-wrapper/src/neo4j_viz/visualization_graph.py b/python-wrapper/src/neo4j_viz/visualization_graph.py index a8fcefad..f3b67c5d 100644 --- a/python-wrapper/src/neo4j_viz/visualization_graph.py +++ b/python-wrapper/src/neo4j_viz/visualization_graph.py @@ -2,12 +2,12 @@ import warnings from collections.abc import Iterable -from typing import Any, Hashable, Optional +from typing import Any, Callable, Hashable, Optional from IPython.display import HTML from pydantic_extra_types.color import Color, ColorType -from .colors import NEO4J_COLORS_CONTINUOUS, NEO4J_COLORS_DISCRETE, ColorsType, PropertyType +from .colors import NEO4J_COLORS_CONTINUOUS, NEO4J_COLORS_DISCRETE, ColorSpace, ColorsType from .node import Node, NodeIdType from .node_size import RealNumber, verify_radii from .nvl import NVL @@ -204,42 +204,64 @@ def _normalize_values( def color_nodes( self, - property: str, + field: Optional[str] = None, + property: Optional[str] = None, colors: Optional[ColorsType] = None, - property_type: PropertyType = PropertyType.DISCRETE, + color_space: ColorSpace = ColorSpace.DISCRETE, override: bool = False, ) -> None: """ - Color the nodes in the graph based on a property. + Color the nodes in the graph based on either a node field, or a node property. - It's possible to color the nodes based on a discrete or continuous property. In the discrete case, a new color - from the `colors` provided is assigned to each unique value of the node property. + It's possible to color the nodes based on a discrete or continuous color space. In the discrete case, a new + color from the `colors` provided is assigned to each unique value of the node field/property. In the continuous case, the `colors` should be a list of colors representing a range that are used to - create a gradient of colors based on the values of the node property. + create a gradient of colors based on the values of the node field/property. Parameters ---------- + field: + The field of the nodes to base the coloring on. The type of this field must be hashable, or be a + list, set or dict containing only hashable types. Must be None if `property` is provided. property: The property of the nodes to base the coloring on. The type of this property must be hashable, or be a - list, set or dict containing only hashable types. + list, set or dict containing only hashable types. Must be None if `field` is provided. colors: The colors to use for the nodes. - If `property_type` is `PropertyType.DISCRETE`, the colors can be a dictionary mapping from property value + If `color_space` is `ColorSpace.DISCRETE`, the colors can be a dictionary mapping from field/property value to color, or an iterable of colors in which case the colors are used in order. - If `property_type` is `PropertyType.CONTINUOUS`, the colors must be a list of colors representing a range. + If `color_space` is `ColorSpace.CONTINUOUS`, the colors must be a list of colors representing a range. Allowed color values are for example “#FF0000”, “red” or (255, 0, 0) (full list: https://docs.pydantic.dev/2.0/usage/types/extra_types/color_types/). The default colors are the Neo4j graph colors. - property_type: - The type of the property, either `PropertyType.DISCRETE` or `PropertyType.CONTINUOUS`. It determines whether - colors are assigned based on unique property values or a gradient of the values of the property. + color_space: + The type of space of the provided `colors`. Either `ColorSpace.DISCRETE` or `ColorSpace.CONTINUOUS`. It determines whether + colors are assigned based on unique field/property values or a gradient of the values of the field/property. override: Whether to override existing colors of the nodes, if they have any. """ - if property_type == PropertyType.DISCRETE: + if not ((field is None) ^ (property is None)): + raise ValueError( + f"Exactly one of the arguments `field` (received '{field}') and `property` (received '{property}') must be provided" + ) + + if field is None: + assert property is not None + attribute = property + + def node_to_attr(node: Node) -> Any: + return node.properties.get(attribute) + else: + assert field is not None + attribute = field + + def node_to_attr(node: Node) -> Any: + return getattr(node, attribute) + + if color_space == ColorSpace.DISCRETE: if colors is None: colors = NEO4J_COLORS_DISCRETE else: - node_map = {node.id: getattr(node, property) for node in self.nodes if getattr(node, property) is not None} + node_map = {node.id: node_to_attr(node) for node in self.nodes if node_to_attr(node) is not None} normalized_map = self._normalize_values(node_map) if colors is None: @@ -250,19 +272,21 @@ def color_nodes( num_colors = len(colors) colors = { - getattr(node, property): colors[round(normalized_map[node.id] * (num_colors - 1))] + node_to_attr(node): colors[round(normalized_map[node.id] * (num_colors - 1))] for node in self.nodes - if getattr(node, property) is not None + if node_to_attr(node) is not None } if isinstance(colors, dict): - self._color_nodes_dict(property, colors, override) + self._color_nodes_dict(colors, override, node_to_attr) else: - self._color_nodes_iter(property, colors, override) + self._color_nodes_iter(attribute, colors, override, node_to_attr) - def _color_nodes_dict(self, property: str, colors: dict[str, ColorType], override: bool) -> None: + def _color_nodes_dict( + self, colors: dict[str, ColorType], override: bool, node_to_attr: Callable[[Node], Any] + ) -> None: for node in self.nodes: - color = colors.get(getattr(node, property)) + color = colors.get(node_to_attr(node)) if color is None: continue @@ -275,12 +299,14 @@ def _color_nodes_dict(self, property: str, colors: dict[str, ColorType], overrid else: node.color = color - def _color_nodes_iter(self, property: str, colors: Iterable[ColorType], override: bool) -> None: + def _color_nodes_iter( + self, attribute: str, colors: Iterable[ColorType], override: bool, node_to_attr: Callable[[Node], Any] + ) -> None: exhausted_colors = False prop_to_color = {} colors_iter = iter(colors) for node in self.nodes: - raw_prop = getattr(node, property) + raw_prop = node_to_attr(node) try: prop = self._make_hashable(raw_prop) except ValueError: @@ -306,7 +332,7 @@ def _color_nodes_iter(self, property: str, colors: Iterable[ColorType], override if exhausted_colors: warnings.warn( - f"Ran out of colors for property '{property}'. {len(prop_to_color)} colors were needed, but only " + f"Ran out of colors for property '{attribute}'. {len(prop_to_color)} colors were needed, but only " f"{len(set(prop_to_color.values()))} were given, so reused colors" ) diff --git a/python-wrapper/tests/test_colors.py b/python-wrapper/tests/test_colors.py index a32546a9..abe4c224 100644 --- a/python-wrapper/tests/test_colors.py +++ b/python-wrapper/tests/test_colors.py @@ -2,7 +2,7 @@ from pydantic_extra_types.color import Color from neo4j_viz import Node, VisualizationGraph -from neo4j_viz.colors import NEO4J_COLORS_CONTINUOUS, NEO4J_COLORS_DISCRETE, PropertyType +from neo4j_viz.colors import NEO4J_COLORS_CONTINUOUS, NEO4J_COLORS_DISCRETE, ColorSpace @pytest.mark.parametrize("override", [True, False]) @@ -15,7 +15,7 @@ def test_color_nodes_dict(override: bool) -> None: VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("caption", {"Person": "#000000", "Product": "#00FF00"}, override=override) + VG.color_nodes(field="caption", colors={"Person": "#000000", "Product": "#00FF00"}, override=override) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -35,7 +35,7 @@ def test_color_nodes_iter_basic(override: bool) -> None: VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("caption", ["#000000", "#00FF00"], override=override) + VG.color_nodes(field="caption", colors=["#000000", "#00FF00"], override=override) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -60,7 +60,7 @@ def test_color_nodes_iter_exhausted() -> None: "Ran out of colors for property 'caption'. 3 colors were needed, but only 2 were given, so reused colors" ), ): - VG.color_nodes("caption", ["#000000", "#00FF00"]) + VG.color_nodes(field="caption", colors=["#000000", "#00FF00"]) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -80,7 +80,7 @@ def test_color_nodes_palette() -> None: ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("caption", Moonrise1_5.colors) + VG.color_nodes(field="caption", colors=Moonrise1_5.colors) assert VG.nodes[0].color == Color((114, 202, 221)) assert VG.nodes[1].color == Color((240, 165, 176)) @@ -107,13 +107,13 @@ def test_color_nodes_default() -> None: def test_color_nodes_continuous_default() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", rank=10), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", rank=20), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", rank=30), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", properties={"rank": 10}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", properties={"rank": 20}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", properties={"rank": 30}), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("rank", property_type=PropertyType.CONTINUOUS) + VG.color_nodes(property="rank", color_space=ColorSpace.CONTINUOUS) assert VG.nodes[0].color == Color(NEO4J_COLORS_CONTINUOUS[0]) assert VG.nodes[1].color == Color(NEO4J_COLORS_CONTINUOUS[128]) @@ -122,14 +122,14 @@ def test_color_nodes_continuous_default() -> None: def test_color_nodes_continuous_custom() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", rank=10), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", rank=18), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", rank=30), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", properties={"rank": 10}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", properties={"rank": 18}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", properties={"rank": 30}), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) colors = [(0, 0, 0), (85, 85, 85), (170, 170, 170), (255, 255, 255)] - VG.color_nodes("rank", colors=colors, property_type=PropertyType.CONTINUOUS) + VG.color_nodes(property="rank", colors=colors, color_space=ColorSpace.CONTINUOUS) assert VG.nodes[0].color == Color("black") assert VG.nodes[1].color == Color((85, 85, 85)) @@ -138,8 +138,8 @@ def test_color_nodes_continuous_custom() -> None: def test_color_nodes_continuous_forbidden() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", rank=10), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", rank=30), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", properties={"rank": 10}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", properties={"rank": 30}), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) @@ -147,21 +147,31 @@ def test_color_nodes_continuous_forbidden() -> None: with pytest.raises( ValueError, match="For continuous properties, `colors` must be a list of colors representing a range" ): - VG.color_nodes("rank", {10: "#000000", 30: "#00FF00"}, property_type=PropertyType.CONTINUOUS) # type: ignore[arg-type] + VG.color_nodes(property="rank", colors={10: "#000000", 30: "#00FF00"}, color_space=ColorSpace.CONTINUOUS) # type: ignore[arg-type] def test_color_nodes_lists() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", labels=["Person"]), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", labels=["Product"]), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", labels=["Product"]), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Both", labels=["Person", "Product"]), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:2", caption="Both again", labels=["Person", "Product"]), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:3", caption="Both reorder", labels=["Product", "Person"]), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", properties={"labels": ["Person"]}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", properties={"labels": ["Product"]}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", properties={"labels": ["Product"]}), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Both", properties={"labels": ["Person", "Product"]} + ), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:2", + caption="Both again", + properties={"labels": ["Person", "Product"]}, + ), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:3", + caption="Both reorder", + properties={"labels": ["Product", "Person"]}, + ), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("labels", ["#000000", "#00FF00", "#FF0000", "#0000FF"]) + VG.color_nodes(property="labels", colors=["#000000", "#00FF00", "#FF0000", "#0000FF"]) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -173,17 +183,27 @@ def test_color_nodes_lists() -> None: def test_color_nodes_sets() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", labels={"Person"}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", labels={"Product"}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", labels={"Product"}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Both", labels={"Person", "Product"}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:2", caption="Both again", labels={"Person", "Product"}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:3", caption="Both reorder", labels={"Product", "Person"}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", properties={"labels": {"Person"}}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", properties={"labels": {"Product"}}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", properties={"labels": {"Product"}}), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Both", properties={"labels": {"Person", "Product"}} + ), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:2", + caption="Both again", + properties={"labels": {"Person", "Product"}}, + ), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:3", + caption="Both reorder", + properties={"labels": {"Product", "Person"}}, + ), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("labels", ["#000000", "#00FF00", "#FF0000", "#0000FF"]) + VG.color_nodes(property="labels", colors=["#000000", "#00FF00", "#FF0000", "#0000FF"]) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -195,15 +215,15 @@ def test_color_nodes_sets() -> None: def test_color_nodes_dicts() -> None: nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", config={"age": 18}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", config={"price": 100}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", config={"price": 100}), - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Product", config={"price": 1}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", properties={"config": {"age": 18}}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="Product", properties={"config": {"price": 100}}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:11", caption="Product", properties={"config": {"price": 100}}), + Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:1", caption="Product", properties={"config": {"price": 1}}), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("config", ["#000000", "#00FF00", "#FF0000", "#0000FF"]) + VG.color_nodes(property="config", colors=["#000000", "#00FF00", "#FF0000", "#0000FF"]) assert VG.nodes[0].color == Color("#000000") assert VG.nodes[1].color == Color("#00ff00") @@ -216,17 +236,21 @@ def test_color_nodes_unhashable() -> None: Node( id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", - config={"movies": ["Star Wars", "Star Trek"]}, + properties={"config": {"movies": ["Star Wars", "Star Trek"]}}, ), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) with pytest.raises(ValueError, match="Unable to color nodes by unhashable property type ''"): - VG.color_nodes("config", ["#000000"]) + VG.color_nodes(property="config", colors=["#000000"]) nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", list_of_lists=[[1, 2], [3, 4]]), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", + caption="Person", + properties={"list_of_lists": [[1, 2], [3, 4]]}, + ), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) with pytest.raises(ValueError, match="Unable to color nodes by unhashable property type ''"): - VG.color_nodes("list_of_lists", ["#000000"]) + VG.color_nodes(property="list_of_lists", colors=["#000000"]) From 92256a6f19fc5e42fe24f457fb9b3d5a79c00cbf Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Wed, 16 Apr 2025 15:16:39 +0200 Subject: [PATCH 05/10] Update render test --- python-wrapper/tests/test_render.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python-wrapper/tests/test_render.py b/python-wrapper/tests/test_render.py index 092ea28a..f505dc14 100644 --- a/python-wrapper/tests/test_render.py +++ b/python-wrapper/tests/test_render.py @@ -69,7 +69,9 @@ def test_unsupported_field_type() -> None: ValueError, match="A field of a node object is not supported: Object of type set is not JSON serializable" ): nodes = [ - Node(id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", unsupported=set([1, 2, 3])), + Node( + id="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", caption="Person", properties={"unsupported": {1, 2, 3}} + ), ] VG = VisualizationGraph(nodes=nodes, relationships=[]) VG.render() @@ -83,7 +85,7 @@ def test_unsupported_field_type() -> None: source="4:d09f48a4-5fca-421d-921d-a30a896c604d:0", target="4:d09f48a4-5fca-421d-921d-a30a896c604d:6", caption="BUYS", - unsupported=set([1, 2, 3]), + properties={"unsupported": {1, 2, 3}}, ), ] VG = VisualizationGraph(nodes=[], relationships=relationships) From a8773ac3bf5fa09496cd32ab75cb95729ed80f32 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Wed, 16 Apr 2025 16:03:36 +0200 Subject: [PATCH 06/10] Start updating docs for new `properties` map --- docs/source/integration.rst | 36 ++++++++++++------- python-wrapper/src/neo4j_viz/gds.py | 1 + python-wrapper/src/neo4j_viz/neo4j.py | 1 + python-wrapper/src/neo4j_viz/node.py | 7 ++-- python-wrapper/src/neo4j_viz/relationship.py | 7 ++-- .../src/neo4j_viz/visualization_graph.py | 1 + 6 files changed, 35 insertions(+), 18 deletions(-) diff --git a/docs/source/integration.rst b/docs/source/integration.rst index 1f451ec7..57096bb1 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -3,8 +3,9 @@ Integration with other libraries In addition to creating graphs from scratch, with ``neo4j-viz`` as is shown in the :doc:`Getting started section <./getting-started>`, you can also import data directly from external sources. -In this section we will cover how to import data from `Pandas DataFrames `_ and -`Neo4j Graph Data Science `_. +In this section we will cover how to import data from `Pandas DataFrames `_, +`Neo4j Graph Data Science `_ and +`Neo4j Database `_. .. contents:: On this page: @@ -31,12 +32,18 @@ The ``from_dfs`` method takes two mandatory positional parameters: * A Pandas ``DataFrame``, or iterable (eg. list) of DataFrames representing the nodes of the graph. The rows of the DataFrame(s) should represent the individual nodes, and the columns should represent the node - IDs and properties. The columns map directly to fields of :doc:`Node <./api-reference/node>`, and as such - should follow the same naming conventions. + IDs and attributes. + If a column shares the name with a field of :doc:`Node <./api-reference/node>`, the values it contains will be set + on corresponding nodes under that field name. + Otherwise, the column name will be a key in each node's `properties` dictionary, that maps to the node's corresponding + value in the column. * A Pandas ``DataFrame``, or iterable (eg. list) of DataFrames representing the relationships of the graph. The rows of the DataFrame(s) should represent the individual relationships, and the columns should represent the - relationship IDs and properties. The columns map directly to fields of - :doc:`Relationship <./api-reference/relationship>`, and as such should follow the same naming conventions. + relationship IDs and attributes. + If a column shares the name with a field of :doc:`Relationship <./api-reference/relationship>`, the values it contains + will be set on corresponding relationships under that field name. + Otherwise, the column name will be a key in each node's `properties` dictionary, that maps to the node's corresponding + value in the column. ``from_dfs`` also takes an optional property, ``node_radius_min_max``, that can be used (and is used by default) to scale the node sizes for the visualization. @@ -47,6 +54,10 @@ will have the size of the second value. The other nodes will be scaled linearly between these two values according to their relative size. This can be useful if node sizes vary a lot, or are all very small or very big. +There is also an option to rename columns that will map to key pairs in `Node.properties`. +This can be done by providing a map from column names to the desired property names as the optional `rename_properties` +argument. + Example ~~~~~~~ @@ -97,11 +108,12 @@ The ``from_gds`` method takes two mandatory positional parameters: * A ``Graph`` representing the projection that one wants to import. We can also provide an optional ``size_property`` parameter, which should refer to a node property of the projection, -and will be used to determine the size of the nodes in the visualization. +and will be used to determine the sizes of the nodes in the visualization. The ``additional_node_properties`` parameter is also optional, and should be a list of additional node properties of the projection that you want to include in the visualization. -For example, these properties could be used to color the nodes, or give captions to them in the visualization. +For example, these properties could be used to color the nodes, or give captions to them in the visualization, or simply +included in the nodes' `Node.properties` maps without directly impacting the visualization. The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for the visualization. @@ -143,7 +155,7 @@ We use the "pagerank" property to determine the size of the nodes, and the "comp # Color the nodes by the `componentId` property, so that the nodes are # colored by the connected component they belong to - VG.color_nodes("componentId") + VG.color_nodes(property="componentId") Please see the :doc:`Visualizing Neo4j Graph Data Science (GDS) Graphs tutorial <./tutorials/gds-example>` for a @@ -167,10 +179,10 @@ The ``from_neo4j`` method takes one mandatory positional parameters: * A ``result`` representing the query result either in form of `neo4j.graph.Graph` or `neo4j.Result`. -The ``node_caption`` parameter is also optional, and indicates the value to use for the caption of each node in the visualization. +The ``node_caption`` parameter is also optional, and indicates the node property to use for the caption of each node in the visualization. -We can also provide an optional ``size_property`` parameter, which should refer to a node property of the projection, -and will be used to determine the size of the nodes in the visualization. +We can also provide an optional ``size_property`` parameter, which should refer to a node property, +and will be used to determine the sizes of the nodes in the visualization. The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for the visualization. diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 60123e15..08462cac 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -38,6 +38,7 @@ def from_gds( All `additional_node_properties` will be included in the visualization graph. If the properties are named as the fields of the `Node` class, they will be included as top level fields of the created `Node` objects. Otherwise, they will be included in the `properties` dictionary. + Additionally, a new "labels" node property will be added. Parameters ---------- diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index 5092a995..1ab21955 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -23,6 +23,7 @@ def from_neo4j( All node and relationship properties will be included in the visualization graph. If the properties are named as the fields of the `Node` or `Relationship` classes, they will be included as top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. + Additionally, a "labels" property will be added for nodes and a "type" property for relationships. Parameters ---------- diff --git a/python-wrapper/src/neo4j_viz/node.py b/python-wrapper/src/neo4j_viz/node.py index 52eced11..9905af1a 100644 --- a/python-wrapper/src/neo4j_viz/node.py +++ b/python-wrapper/src/neo4j_viz/node.py @@ -14,7 +14,8 @@ class Node(BaseModel, extra="allow"): """ A node in a graph to visualize. - All options available in the NVL library (see https://neo4j.com/docs/nvl/current/base-library/#_nodes) + + For more info on each field, see the NVL library docs: https://neo4j.com/docs/nvl/current/base-library/#_nodes """ #: Unique identifier for the node @@ -45,8 +46,8 @@ class Node(BaseModel, extra="allow"): x: Optional[RealNumber] = Field(None, description="The x-coordinate of the node") #: The y-coordinate of the node y: Optional[RealNumber] = Field(None, description="The y-coordinate of the node") - #: The properties of the node - properties: dict[str, Any] = Field(default_factory=dict, description="The properties of the node") + #: Additional properties of the node that do not directly impact the visualization + properties: dict[str, Any] = Field(default_factory=dict, description="Additional properties of the node that do not directly impact the visualization") @field_serializer("color") def serialize_color(self, color: Color) -> str: diff --git a/python-wrapper/src/neo4j_viz/relationship.py b/python-wrapper/src/neo4j_viz/relationship.py index 10e938ec..6345262d 100644 --- a/python-wrapper/src/neo4j_viz/relationship.py +++ b/python-wrapper/src/neo4j_viz/relationship.py @@ -12,7 +12,8 @@ class Relationship(BaseModel, extra="allow"): """ A relationship in a graph to visualize. - All options available in the NVL library (see https://neo4j.com/docs/nvl/current/base-library/#_relationships) + + For more info on each field, see the NVL library docs: https://neo4j.com/docs/nvl/current/base-library/#_relationships """ #: Unique identifier for the relationship @@ -43,8 +44,8 @@ class Relationship(BaseModel, extra="allow"): ) #: The color of the relationship. Allowed input is for example "#FF0000", "red" or (255, 0, 0) color: Optional[ColorType] = Field(None, description="The color of the relationship") - #: The properties of the relationship - properties: dict[str, Any] = Field(default_factory=dict, description="The properties of the relationship") + #: Additional properties of the relationship that do not directly impact the visualization + properties: dict[str, Any] = Field(default_factory=dict, description="Additional properties of the relationship that do not directly impact the visualization") @field_serializer("color") def serialize_color(self, color: Color) -> str: diff --git a/python-wrapper/src/neo4j_viz/visualization_graph.py b/python-wrapper/src/neo4j_viz/visualization_graph.py index f3b67c5d..1e27e088 100644 --- a/python-wrapper/src/neo4j_viz/visualization_graph.py +++ b/python-wrapper/src/neo4j_viz/visualization_graph.py @@ -204,6 +204,7 @@ def _normalize_values( def color_nodes( self, + *, field: Optional[str] = None, property: Optional[str] = None, colors: Optional[ColorsType] = None, From f8b121c40d8a42947cee83cac20fc7d79b4163ee Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 17 Apr 2025 09:34:29 +0200 Subject: [PATCH 07/10] Simplify `from_dfs` surface --- docs/source/integration.rst | 4 -- python-wrapper/src/neo4j_viz/gds.py | 4 +- python-wrapper/src/neo4j_viz/node.py | 5 ++- python-wrapper/src/neo4j_viz/pandas.py | 47 +++++++++++--------- python-wrapper/src/neo4j_viz/relationship.py | 5 ++- 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/docs/source/integration.rst b/docs/source/integration.rst index 57096bb1..31687303 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -54,10 +54,6 @@ will have the size of the second value. The other nodes will be scaled linearly between these two values according to their relative size. This can be useful if node sizes vary a lot, or are all very small or very big. -There is also an option to rename columns that will map to key pairs in `Node.properties`. -This can be done by providing a map from column names to the desired property names as the optional `rename_properties` -argument. - Example ~~~~~~~ diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 08462cac..75c2052c 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -6,7 +6,7 @@ import pandas as pd from graphdatascience import Graph, GraphDataScience -from .pandas import from_dfs +from .pandas import _from_dfs from .visualization_graph import VisualizationGraph @@ -97,4 +97,4 @@ def from_gds( rel_df = _rel_df(gds, G) rel_df.rename(columns={"sourceNodeId": "source", "targetNodeId": "target"}, inplace=True) - return from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}) + return _from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}) diff --git a/python-wrapper/src/neo4j_viz/node.py b/python-wrapper/src/neo4j_viz/node.py index 9905af1a..f8e15c98 100644 --- a/python-wrapper/src/neo4j_viz/node.py +++ b/python-wrapper/src/neo4j_viz/node.py @@ -47,7 +47,10 @@ class Node(BaseModel, extra="allow"): #: The y-coordinate of the node y: Optional[RealNumber] = Field(None, description="The y-coordinate of the node") #: Additional properties of the node that do not directly impact the visualization - properties: dict[str, Any] = Field(default_factory=dict, description="Additional properties of the node that do not directly impact the visualization") + properties: dict[str, Any] = Field( + default_factory=dict, + description="Additional properties of the node that do not directly impact the visualization", + ) @field_serializer("color") def serialize_color(self, color: Color) -> str: diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py index d01ec1ac..5eb92752 100644 --- a/python-wrapper/src/neo4j_viz/pandas.py +++ b/python-wrapper/src/neo4j_viz/pandas.py @@ -12,31 +12,12 @@ DFS_TYPE = Union[DataFrame, Iterable[DataFrame]] -def from_dfs( +def _from_dfs( node_dfs: DFS_TYPE, rel_dfs: DFS_TYPE, node_radius_min_max: Optional[tuple[float, float]] = (3, 60), rename_properties: Optional[dict[str, str]] = None, ) -> VisualizationGraph: - """ - Create a VisualizationGraph from pandas DataFrames representing a graph. - - All columns will be included in the visualization graph. - If the columns are named as the fields of the `Node` or `Relationship` classes, they will be included as - top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. - - Parameters - ---------- - node_dfs: Union[DataFrame, Iterable[DataFrame]] - DataFrame or iterable of DataFrames containing node data. - rel_dfs: Union[DataFrame, Iterable[DataFrame]] - DataFrame or iterable of DataFrames containing relationship data. - node_radius_min_max : tuple[float, float], optional - Minimum and maximum node radius. - To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range. - rename_properties : dict[str, str], optional - An optional map for renaming certain column names when they are converted to properties. - """ if isinstance(node_dfs, DataFrame): node_dfs_iter: Iterable[DataFrame] = [node_dfs] else: @@ -85,3 +66,29 @@ def from_dfs( VG.resize_nodes(node_radius_min_max=node_radius_min_max) return VG + + +def from_dfs( + node_dfs: DFS_TYPE, + rel_dfs: DFS_TYPE, + node_radius_min_max: Optional[tuple[float, float]] = (3, 60), +) -> VisualizationGraph: + """ + Create a VisualizationGraph from pandas DataFrames representing a graph. + + All columns will be included in the visualization graph. + If the columns are named as the fields of the `Node` or `Relationship` classes, they will be included as + top level fields of the respective objects. Otherwise, they will be included in the `properties` dictionary. + + Parameters + ---------- + node_dfs: Union[DataFrame, Iterable[DataFrame]] + DataFrame or iterable of DataFrames containing node data. + rel_dfs: Union[DataFrame, Iterable[DataFrame]] + DataFrame or iterable of DataFrames containing relationship data. + node_radius_min_max : tuple[float, float], optional + Minimum and maximum node radius. + To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range. + """ + + return _from_dfs(node_dfs, rel_dfs, node_radius_min_max) diff --git a/python-wrapper/src/neo4j_viz/relationship.py b/python-wrapper/src/neo4j_viz/relationship.py index 6345262d..f19d1b09 100644 --- a/python-wrapper/src/neo4j_viz/relationship.py +++ b/python-wrapper/src/neo4j_viz/relationship.py @@ -45,7 +45,10 @@ class Relationship(BaseModel, extra="allow"): #: The color of the relationship. Allowed input is for example "#FF0000", "red" or (255, 0, 0) color: Optional[ColorType] = Field(None, description="The color of the relationship") #: Additional properties of the relationship that do not directly impact the visualization - properties: dict[str, Any] = Field(default_factory=dict, description="Additional properties of the relationship that do not directly impact the visualization") + properties: dict[str, Any] = Field( + default_factory=dict, + description="Additional properties of the relationship that do not directly impact the visualization", + ) @field_serializer("color") def serialize_color(self, color: Color) -> str: From c7654c9f374c2abc31350a8184c4a8238a2e650b Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 17 Apr 2025 09:50:37 +0200 Subject: [PATCH 08/10] Finish updating docs --- docs/source/customizing.rst | 40 ++++++++++--------- examples/streamlit-example.py | 2 +- python-wrapper/src/neo4j_viz/gds.py | 2 +- .../src/neo4j_viz/visualization_graph.py | 2 +- python-wrapper/tests/test_colors.py | 2 +- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/docs/source/customizing.rst b/docs/source/customizing.rst index 37918c8e..35abad4e 100644 --- a/docs/source/customizing.rst +++ b/docs/source/customizing.rst @@ -23,11 +23,11 @@ If you have not yet created a ``VisualizationGraph`` object, please refer to one Coloring nodes -------------- -Nodes can be colored directly by providing them with a color property, upon creation. +Nodes can be colored directly by providing them with a color field, upon creation. This can for example be done by passing a color as a string to the ``color`` parameter of the :doc:`Node <./api-reference/node>` object. -Alternatively, you can color nodes based on a property (field) of the nodes after a ``VisualizationGraph`` object has been +Alternatively, you can color nodes based on a field or property of the nodes after a ``VisualizationGraph`` object has been created. @@ -35,12 +35,13 @@ The ``color_nodes`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~ By calling the :meth:`neo4j_viz.VisualizationGraph.color_nodes` method, you can color nodes based on a -node property (field). -It's possible to color the nodes based on a discrete or continuous property. -In the discrete case, a new color from the ``colors`` provided is assigned to each unique value of the node property. -In the continuous case, the ``colors`` should be a list of colors representing a range that are used to create a gradient of colors based on the values of the node property. +node field or property (members of the `Node.properties` map). +It's possible to color the nodes based on a discrete or continuous color space. In the discrete case, a new +color from the `colors` provided is assigned to each unique value of the node field/property. +In the continuous case, the `colors` should be a list of colors representing a range that are used to +create a gradient of colors based on the values of the node field/property. -By default the Neo4j color palette that works for both light and dark mode will be used. +By default the Neo4j color palette, that works for both light and dark mode, will be used. If you want to use a different color palette, you can pass a dictionary or iterable of colors as the ``colors`` parameter. A color value can for example be either strings like "blue", or hexadecimal color codes like "#FF0000", or even a tuple of RGB values like (255, 0, 255). @@ -52,17 +53,17 @@ parameter. By discrete node property (field) ********************************* -To not use the default colors, we can provide a list of custom colors based on the discrete node property (field) "caption" to the ``color_nodes`` method: +To not use the default colors, we can provide a list of custom colors based on the discrete node field "caption" to the ``color_nodes`` method: .. code-block:: python - from neo4j_viz.colors import PropertyType + from neo4j_viz.colors import ColorSpace # VG is a VisualizationGraph object VG.color_nodes( - "caption", + field="caption", ["red", "#7fffd4", (255, 255, 255, 0.5), "hsl(270, 60%, 70%)"], - property_type=PropertyType.DISCRETE + color_space=ColorSpace.DISCRETE ) The full set of allowed values for colors are listed `here `_. @@ -75,12 +76,12 @@ this snippet: from palettable.wesanderson import Moonrise1_5 # VG is a VisualizationGraph object - VG.color_nodes("caption", Moonrise1_5.colors) # PropertyType.DISCRETE is default + VG.color_nodes(field="caption", Moonrise1_5.colors) # PropertyType.DISCRETE is default -In this case, all nodes with the same caption will get the same color. +In theses cases, all nodes with the same caption will get the same color. -If there are fewer colors that unique values for the node ``property`` provided, the colors will be reused in a cycle. -To avoid that, you could use another palette or extend one with additional colors. Please refer to the +If there are fewer colors than unique values for the node ``field`` or ``property`` provided, the colors will be reused in a cycle. +To avoid that, you could use a larger palette or extend one with additional colors. Please refer to the :doc:`Visualizing Neo4j Graph Data Science (GDS) Graphs tutorial <./tutorials/gds-example>` for an example on how to do the latter. @@ -96,9 +97,9 @@ To not use the default colors, we can provide a list of custom colors representi # VG is a VisualizationGraph object VG.color_nodes( - "centrality_score", + property="centrality_score", [(255, 0, 0), (191, 64, 0), (128, 128, 0), (64, 191, 0), (0, 255, 0)] # From red to green - property_type=PropertyType.CONTINUOUS + color_space=ColorSpace.CONTINUOUS ) In this case, the nodes will be colored based on the value of the "centrality_score" property, with the lowest values being colored red and the highest values being colored green. @@ -110,7 +111,7 @@ Since we only provided five colors in the range, the granularity of the gradient Sizing nodes ------------ -Nodes can be given a size directly by providing them with a size property, upon creation. +Nodes can be given a size directly by providing them with a size field, upon creation. This can for example be done by passing a size as an integer to the ``size`` parameter of the :doc:`Node <./api-reference/node>` object. @@ -178,7 +179,7 @@ In the following example, we pin the node with ID 1337 and unpin the node with I Direct modification of nodes and relationships ---------------------------------------------- -Nodes and relationships can also be modified directly by accessing the ``nodes`` and ``relationships`` attributes of an +Nodes and relationships can also be modified directly by accessing the ``nodes`` and ``relationships`` fields of an existing ``VisualizationGraph`` object. These attributes list of all the :doc:`Nodes <./api-reference/node>` and :doc:`Relationships <./api-reference/relationship>` in the graph, respectively. @@ -189,6 +190,7 @@ Each node and relationship has attributes that can be accessed and modified dire # VG is a VisualizationGraph object VG.nodes[0].size = 10 + VG.nodes[0].properties["height"] = 170 VG.relationships[4].caption = "BUYS" Any changes made to the nodes and relationships will be reflected in the next rendering of the graph. diff --git a/examples/streamlit-example.py b/examples/streamlit-example.py index 098be854..a0c3e491 100644 --- a/examples/streamlit-example.py +++ b/examples/streamlit-example.py @@ -25,7 +25,7 @@ def create_visualization_graph() -> VisualizationGraph: nodes_df.drop(columns="features", inplace=True) VG = from_dfs(nodes_df, rels_df) - VG.color_nodes("subject") + VG.color_nodes(property="subject") return VG diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 75c2052c..7be4151d 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -38,7 +38,7 @@ def from_gds( All `additional_node_properties` will be included in the visualization graph. If the properties are named as the fields of the `Node` class, they will be included as top level fields of the created `Node` objects. Otherwise, they will be included in the `properties` dictionary. - Additionally, a new "labels" node property will be added. + Additionally, a new "labels" node property will be added, containing the node labels of the node. Parameters ---------- diff --git a/python-wrapper/src/neo4j_viz/visualization_graph.py b/python-wrapper/src/neo4j_viz/visualization_graph.py index 1e27e088..cf2d8a76 100644 --- a/python-wrapper/src/neo4j_viz/visualization_graph.py +++ b/python-wrapper/src/neo4j_viz/visualization_graph.py @@ -350,7 +350,7 @@ def _make_hashable(raw_prop: Any) -> Hashable: try: hash(prop) except TypeError: - raise ValueError(f"Unable to convert property '{raw_prop}' of type {type(raw_prop)} to a hashable type") + raise ValueError(f"Unable to convert '{raw_prop}' of type {type(raw_prop)} to a hashable type") assert isinstance(prop, Hashable) diff --git a/python-wrapper/tests/test_colors.py b/python-wrapper/tests/test_colors.py index abe4c224..adc13951 100644 --- a/python-wrapper/tests/test_colors.py +++ b/python-wrapper/tests/test_colors.py @@ -97,7 +97,7 @@ def test_color_nodes_default() -> None: ] VG = VisualizationGraph(nodes=nodes, relationships=[]) - VG.color_nodes("caption") + VG.color_nodes(field="caption") assert VG.nodes[0].color == Color(NEO4J_COLORS_DISCRETE[0]) assert VG.nodes[1].color == Color(NEO4J_COLORS_DISCRETE[1]) From a9cde0d54b082db60f27a0de72e524011e72eec2 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Thu, 17 Apr 2025 10:02:30 +0200 Subject: [PATCH 09/10] Update examples according to `properties` dict --- examples/gds-example.ipynb | 106 +++++++++++++------------------- examples/neo4j-example.ipynb | 30 ++++----- examples/snowpark-example.ipynb | 4 +- 3 files changed, 60 insertions(+), 80 deletions(-) diff --git a/examples/gds-example.ipynb b/examples/gds-example.ipynb index cb8d44ed..78970831 100644 --- a/examples/gds-example.ipynb +++ b/examples/gds-example.ipynb @@ -129,14 +129,14 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", "