From 47592d875cd47bad41914e0b066ad13a2e121da0 Mon Sep 17 00:00:00 2001
From: Adam Schill Collberg <adam.schill.collberg@protonmail.com>
Date: Fri, 27 Jun 2025 09:57:28 +0200
Subject: [PATCH 1/4] Make sure to drop temp internal node props in `from_gds`

---
 changelog.md                        | 2 ++
 python-wrapper/src/neo4j_viz/gds.py | 4 ++++
 python-wrapper/tests/test_gds.py    | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/changelog.md b/changelog.md
index 54e3a0b9..c83f505b 100644
--- a/changelog.md
+++ b/changelog.md
@@ -13,6 +13,8 @@
 
 ## Bug fixes
 
+* Make sure that temporary internal node properties are not included in the visualization output.
+
 
 ## Improvements
 
diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py
index 338de75c..aff8ad78 100644
--- a/python-wrapper/src/neo4j_viz/gds.py
+++ b/python-wrapper/src/neo4j_viz/gds.py
@@ -119,6 +119,10 @@ def from_gds(
             node_properties = [property_name]
 
         node_dfs = _fetch_node_dfs(gds, G_fetched, node_properties, G_fetched.node_labels())
+        if property_name is not None:
+            for df in node_dfs.values():
+                df.drop(columns=[property_name], inplace=True)
+
         rel_df = _fetch_rel_df(gds, G_fetched)
     finally:
         if G_fetched.name() != G.name():
diff --git a/python-wrapper/tests/test_gds.py b/python-wrapper/tests/test_gds.py
index fda1caf3..773a21d9 100644
--- a/python-wrapper/tests/test_gds.py
+++ b/python-wrapper/tests/test_gds.py
@@ -276,6 +276,9 @@ def test_from_gds_sample(gds: Any) -> None:
         ):
             VG = from_gds(gds, G)
 
+        # Make sure internal temporary properties are not present
+        assert set(VG.nodes[0].properties.keys()) == {"labels"}
+
         assert len(VG.nodes) >= 9_500
         assert len(VG.nodes) <= 10_500
         assert len(VG.relationships) >= 9_500

From 4677de6c5777a9744bd090c3a386d37076c0f269 Mon Sep 17 00:00:00 2001
From: Adam Schill Collberg <adam.schill.collberg@protonmail.com>
Date: Fri, 27 Jun 2025 14:35:45 +0200
Subject: [PATCH 2/4] Allow entities with different property sets in `from_gds`
 loader

---
 changelog.md                           |  4 +-
 python-wrapper/src/neo4j_viz/gds.py    | 58 +++++++++++++-----------
 python-wrapper/src/neo4j_viz/pandas.py | 23 +++++++---
 python-wrapper/tests/test_gds.py       | 61 ++++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 31 deletions(-)

diff --git a/changelog.md b/changelog.md
index c83f505b..907a45f0 100644
--- a/changelog.md
+++ b/changelog.md
@@ -9,11 +9,13 @@
 ## New features
 
 * Allow passing a `neo4j.Driver` instance as input to `from_neo4j`, in which case the driver will be used internally to fetch the graph data using a simple query
+* Added optional argument `dropna` to `from_dfs` loader allowing for not including NaN properties in the created visualization graph
 
 
 ## Bug fixes
 
-* Make sure that temporary internal node properties are not included in the visualization output.
+* Make sure that temporary internal node properties are not included in the visualization output
+* Fixed bug where loading a graph with `from_gds` where all node or relationship properties are not present on every entity would result in an error
 
 
 ## Improvements
diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py
index aff8ad78..4aba8ec4 100644
--- a/python-wrapper/src/neo4j_viz/gds.py
+++ b/python-wrapper/src/neo4j_viz/gds.py
@@ -14,11 +14,11 @@
 
 
 def _fetch_node_dfs(
-    gds: GraphDataScience, G: Graph, node_properties: list[str], node_labels: list[str]
+    gds: GraphDataScience, G: Graph, node_properties_by_label: dict[str, list[str]], node_labels: list[str]
 ) -> dict[str, pd.DataFrame]:
     return {
         lbl: gds.graph.nodeProperties.stream(
-            G, node_properties=node_properties, node_labels=[lbl], separate_property_columns=True
+            G, node_properties=node_properties_by_label[lbl], node_labels=[lbl], separate_property_columns=True
         )
         for lbl in node_labels
     }
@@ -79,24 +79,31 @@ def from_gds(
     """
     node_properties_from_gds = G.node_properties()
     assert isinstance(node_properties_from_gds, pd.Series)
-    actual_node_properties = list(chain.from_iterable(node_properties_from_gds.to_dict().values()))
+    actual_node_properties = node_properties_from_gds.to_dict()
+    all_actual_node_properties = list(chain.from_iterable(actual_node_properties.values()))
 
-    if size_property is not None and size_property not in actual_node_properties:
-        raise ValueError(f"There is no node property '{size_property}' in graph '{G.name()}'")
+    if size_property is not None:
+        if size_property not in all_actual_node_properties:
+            raise ValueError(f"There is no node property '{size_property}' in graph '{G.name()}'")
 
     if additional_node_properties is None:
-        additional_node_properties = actual_node_properties
+        node_properties_by_label = {k: set(v) for k, v in actual_node_properties.items()}
     else:
         for prop in additional_node_properties:
-            if prop not in actual_node_properties:
+            if prop not in all_actual_node_properties:
                 raise ValueError(f"There is no node property '{prop}' in graph '{G.name()}'")
 
-    node_properties = set()
-    if additional_node_properties is not None:
-        node_properties.update(additional_node_properties)
+        node_properties_by_label = {}
+        for label, props in actual_node_properties.items():
+            node_properties_by_label[label] = {
+                prop for prop in actual_node_properties[label] if prop in additional_node_properties
+            }
+
     if size_property is not None:
-        node_properties.add(size_property)
-    node_properties = list(node_properties)
+        for label, props in node_properties_by_label.items():
+            props.add(size_property)
+
+    node_properties_by_label = {k: list(v) for k, v in node_properties_by_label.items()}
 
     node_count = G.node_count()
     if node_count > max_node_count:
@@ -112,13 +119,14 @@ def from_gds(
     property_name = None
     try:
         # Since GDS does not allow us to only fetch node IDs, we add the degree property
-        # as a temporary property to ensure that we have at least one property to fetch
-        if len(actual_node_properties) == 0:
+        # as a temporary property to ensure that we have at least one property for each label to fetch
+        if sum([len(props) == 0 for props in node_properties_by_label.values()]) > 0:
             property_name = f"neo4j-viz_property_{uuid4()}"
             gds.degree.mutate(G_fetched, mutateProperty=property_name)
-            node_properties = [property_name]
+            for props in node_properties_by_label.values():
+                props.append(property_name)
 
-        node_dfs = _fetch_node_dfs(gds, G_fetched, node_properties, G_fetched.node_labels())
+        node_dfs = _fetch_node_dfs(gds, G_fetched, node_properties_by_label, G_fetched.node_labels())
         if property_name is not None:
             for df in node_dfs.values():
                 df.drop(columns=[property_name], inplace=True)
@@ -131,35 +139,35 @@ def from_gds(
             gds.graph.nodeProperties.drop(G_fetched, node_properties=[property_name])
 
     for df in node_dfs.values():
-        df.rename(columns={"nodeId": "id"}, inplace=True)
         if property_name is not None and property_name in df.columns:
             df.drop(columns=[property_name], inplace=True)
-    rel_df.rename(columns={"sourceNodeId": "source", "targetNodeId": "target"}, inplace=True)
 
     node_props_df = pd.concat(node_dfs.values(), ignore_index=True, axis=0).drop_duplicates()
     if size_property is not None:
-        if "size" in actual_node_properties and size_property != "size":
+        if "size" in all_actual_node_properties and size_property != "size":
             node_props_df.rename(columns={"size": "__size"}, inplace=True)
         node_props_df.rename(columns={size_property: "size"}, inplace=True)
 
     for lbl, df in node_dfs.items():
-        if "labels" in actual_node_properties:
+        if "labels" in all_actual_node_properties:
             df.rename(columns={"labels": "__labels"}, inplace=True)
         df["labels"] = lbl
 
-    node_labels_df = pd.concat([df[["id", "labels"]] for df in node_dfs.values()], ignore_index=True, axis=0)
-    node_labels_df = node_labels_df.groupby("id").agg({"labels": list})
+    node_labels_df = pd.concat([df[["nodeId", "labels"]] for df in node_dfs.values()], ignore_index=True, axis=0)
+    node_labels_df = node_labels_df.groupby("nodeId").agg({"labels": list})
 
-    node_df = node_props_df.merge(node_labels_df, on="id")
+    node_df = node_props_df.merge(node_labels_df, on="nodeId")
 
-    if "caption" not in actual_node_properties:
+    if "caption" not in all_actual_node_properties:
         node_df["caption"] = node_df["labels"].astype(str)
 
     if "caption" not in rel_df.columns:
         rel_df["caption"] = rel_df["relationshipType"]
 
     try:
-        return _from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"})
+        return _from_dfs(
+            node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}, dropna=True
+        )
     except ValueError as e:
         err_msg = str(e)
         if "column" in err_msg:
diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py
index 15e29c0e..761a2c63 100644
--- a/python-wrapper/src/neo4j_viz/pandas.py
+++ b/python-wrapper/src/neo4j_viz/pandas.py
@@ -31,8 +31,9 @@ def _from_dfs(
     rel_dfs: DFS_TYPE,
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
     rename_properties: Optional[dict[str, str]] = None,
+    dropna: bool = False,
 ) -> VisualizationGraph:
-    relationships = _parse_relationships(rel_dfs, rename_properties=rename_properties)
+    relationships = _parse_relationships(rel_dfs, rename_properties=rename_properties, dropna=dropna)
 
     if node_dfs is None:
         has_size = False
@@ -42,7 +43,7 @@ def _from_dfs(
             node_ids.add(rel.target)
         nodes = [Node(id=id) for id in node_ids]
     else:
-        nodes, has_size = _parse_nodes(node_dfs, rename_properties=rename_properties)
+        nodes, has_size = _parse_nodes(node_dfs, rename_properties=rename_properties, dropna=dropna)
 
     VG = VisualizationGraph(nodes=nodes, relationships=relationships)
 
@@ -52,7 +53,9 @@ def _from_dfs(
     return VG
 
 
-def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]) -> tuple[list[Node], bool]:
+def _parse_nodes(
+    node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
+) -> tuple[list[Node], bool]:
     if isinstance(node_dfs, DataFrame):
         node_dfs_iter: Iterable[DataFrame] = [node_dfs]
     elif node_dfs is None:
@@ -67,6 +70,8 @@ def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]
     for node_df in node_dfs_iter:
         has_size &= "size" in node_df.columns
         for _, row in node_df.iterrows():
+            if dropna:
+                row.dropna(inplace=True)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -85,7 +90,9 @@ def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]
     return nodes, has_size
 
 
-def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]) -> list[Relationship]:
+def _parse_relationships(
+    rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
+) -> list[Relationship]:
     all_rel_field_aliases = Relationship.all_validation_aliases()
 
     if isinstance(rel_dfs, DataFrame):
@@ -96,6 +103,8 @@ def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str
 
     for rel_df in rel_dfs_iter:
         for _, row in rel_df.iterrows():
+            if dropna:
+                row.dropna(inplace=True)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -118,6 +127,7 @@ def from_dfs(
     node_dfs: Optional[DFS_TYPE],
     rel_dfs: DFS_TYPE,
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
+    dropna: bool = False,
 ) -> VisualizationGraph:
     """
     Create a VisualizationGraph from pandas DataFrames representing a graph.
@@ -136,6 +146,9 @@ def from_dfs(
     node_radius_min_max : tuple[float, float], optional
         Minimum and maximum node radius.
         To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range.
+    dropna : bool, optional
+        If True, NaN values will be dropped from the DataFrames before processing.
+        Defaults to False.
     """
 
-    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max)
+    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max, dropna=dropna)
diff --git a/python-wrapper/tests/test_gds.py b/python-wrapper/tests/test_gds.py
index 773a21d9..75f87471 100644
--- a/python-wrapper/tests/test_gds.py
+++ b/python-wrapper/tests/test_gds.py
@@ -283,3 +283,64 @@ def test_from_gds_sample(gds: Any) -> None:
         assert len(VG.nodes) <= 10_500
         assert len(VG.relationships) >= 9_500
         assert len(VG.relationships) <= 10_500
+
+
+@pytest.mark.requires_neo4j_and_gds
+def test_from_gds_hetero(gds: Any) -> None:
+    from neo4j_viz.gds import from_gds
+
+    A_nodes = pd.DataFrame(
+        {
+            "nodeId": [0, 1],
+            "labels": ["A", "A"],
+            "component": [1, 2],
+        }
+    )
+    B_nodes = pd.DataFrame(
+        {
+            "nodeId": [2, 3],
+            "labels": ["B", "B"],
+            # No 'component' property
+        }
+    )
+    rels = pd.DataFrame(
+        {
+            "sourceNodeId": [0, 1],
+            "targetNodeId": [2, 3],
+            "weight": [0.5, 1.5],
+            "relationshipType": ["REL", "REL2"],
+        }
+    )
+
+    with gds.graph.construct("flo", [A_nodes, B_nodes], rels) as G:
+        VG = from_gds(
+            gds,
+            G,
+        )
+
+        assert len(VG.nodes) == 4
+        assert sorted(VG.nodes, key=lambda x: x.id) == [
+            Node(id=0, caption="['A']", properties=dict(labels=["A"], component=float(1))),
+            Node(id=1, caption="['A']", properties=dict(labels=["A"], component=float(2))),
+            Node(id=2, caption="['B']", properties=dict(labels=["B"])),
+            Node(id=3, caption="['B']", properties=dict(labels=["B"])),
+        ]
+
+        assert len(VG.relationships) == 2
+        vg_rels = sorted(
+            [
+                (
+                    e.source,
+                    e.target,
+                    e.caption,
+                    e.properties["relationshipType"],
+                    e.properties["weight"],
+                )
+                for e in VG.relationships
+            ],
+            key=lambda x: x[0],
+        )
+        assert vg_rels == [
+            (0, 2, "REL", "REL", 0.5),
+            (1, 3, "REL2", "REL2", 1.5),
+        ]

From 297cf7d0d28965ec8a183701fcdeb562cf56a3d7 Mon Sep 17 00:00:00 2001
From: Adam Schill Collberg <adam.schill.collberg@protonmail.com>
Date: Tue, 1 Jul 2025 09:56:49 +0200
Subject: [PATCH 3/4] Address review comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Florentin Dörre <florentin.dorre@neotechnology.com>
---
 changelog.md                           |  1 -
 python-wrapper/src/neo4j_viz/pandas.py | 10 +++-------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/changelog.md b/changelog.md
index 907a45f0..762168aa 100644
--- a/changelog.md
+++ b/changelog.md
@@ -9,7 +9,6 @@
 ## New features
 
 * Allow passing a `neo4j.Driver` instance as input to `from_neo4j`, in which case the driver will be used internally to fetch the graph data using a simple query
-* Added optional argument `dropna` to `from_dfs` loader allowing for not including NaN properties in the created visualization graph
 
 
 ## Bug fixes
diff --git a/python-wrapper/src/neo4j_viz/pandas.py b/python-wrapper/src/neo4j_viz/pandas.py
index 761a2c63..b07d9c39 100644
--- a/python-wrapper/src/neo4j_viz/pandas.py
+++ b/python-wrapper/src/neo4j_viz/pandas.py
@@ -71,7 +71,7 @@ def _parse_nodes(
         has_size &= "size" in node_df.columns
         for _, row in node_df.iterrows():
             if dropna:
-                row.dropna(inplace=True)
+                row = row.dropna(inplace=False)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -104,7 +104,7 @@ def _parse_relationships(
     for rel_df in rel_dfs_iter:
         for _, row in rel_df.iterrows():
             if dropna:
-                row.dropna(inplace=True)
+                row = row.dropna(inplace=False)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -127,7 +127,6 @@ def from_dfs(
     node_dfs: Optional[DFS_TYPE],
     rel_dfs: DFS_TYPE,
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
-    dropna: bool = False,
 ) -> VisualizationGraph:
     """
     Create a VisualizationGraph from pandas DataFrames representing a graph.
@@ -146,9 +145,6 @@ def from_dfs(
     node_radius_min_max : tuple[float, float], optional
         Minimum and maximum node radius.
         To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range.
-    dropna : bool, optional
-        If True, NaN values will be dropped from the DataFrames before processing.
-        Defaults to False.
     """
 
-    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max, dropna=dropna)
+    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max, dropna=False)

From b696f77cf5164b91b572ff337c5a6ddbc8092f2c Mon Sep 17 00:00:00 2001
From: Adam Schill Collberg <adam.schill.collberg@protonmail.com>
Date: Tue, 1 Jul 2025 10:07:38 +0200
Subject: [PATCH 4/4] Fix mypy complaints

---
 python-wrapper/tests/conftest.py   | 2 ++
 python-wrapper/tests/gds_helper.py | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python-wrapper/tests/conftest.py b/python-wrapper/tests/conftest.py
index 081a7007..3f7aa6c7 100644
--- a/python-wrapper/tests/conftest.py
+++ b/python-wrapper/tests/conftest.py
@@ -43,7 +43,9 @@ def aura_ds_instance() -> Generator[Any, None, None]:
 
     # setting as environment variables to run notebooks with this connection
     os.environ["NEO4J_URI"] = dbms_connection_info.uri
+    assert isinstance(dbms_connection_info.username, str)
     os.environ["NEO4J_USER"] = dbms_connection_info.username
+    assert isinstance(dbms_connection_info.password, str)
     os.environ["NEO4J_PASSWORD"] = dbms_connection_info.password
     yield dbms_connection_info
 
diff --git a/python-wrapper/tests/gds_helper.py b/python-wrapper/tests/gds_helper.py
index e5fa270d..e5a0d3dc 100644
--- a/python-wrapper/tests/gds_helper.py
+++ b/python-wrapper/tests/gds_helper.py
@@ -62,8 +62,6 @@ def create_aurads_instance(api: AuraApi) -> tuple[str, DbmsConnectionInfo]:
     if wait_result.error:
         raise Exception(f"Error while waiting for instance to be running: {wait_result.error}")
 
-    wait_result.connection_url
-
     return instance_details.id, DbmsConnectionInfo(
         uri=wait_result.connection_url,
         username="neo4j",