From 8c44bb5604c998aebee419a6b8578c1e27013f82 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 15:26:18 -0700 Subject: [PATCH 01/10] test workflow --- spanner_graphs/conversion.py | 141 ----------------------------------- 1 file changed, 141 deletions(-) delete mode 100644 spanner_graphs/conversion.py diff --git a/spanner_graphs/conversion.py b/spanner_graphs/conversion.py deleted file mode 100644 index d900dbf..0000000 --- a/spanner_graphs/conversion.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2025 Google LLC - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# https://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This module contains implementation to convert columns from -a database column into usable data for building a graph -""" - -from __future__ import annotations -from typing import Any, List, Dict, Tuple -import json - -from spanner_graphs.database import SpannerFieldInfo -from spanner_graphs.graph_entities import Node, Edge -from spanner_graphs.schema_manager import SchemaManager - -def get_nodes_edges(data: Dict[str, List[Any]], fields: List[SpannerFieldInfo], - schema_json: dict = None) -> Tuple[List[Node], List[Edge]]: - """Retrieves the nodes and edges for query results, in the form expected by the Javascript code. - - WARNING: This function is not only used for graph visualization in Spanner, but also BigQuery. - Currently, infrastructure is set up to only run the Spanner tests, so, when changing this function, - you will need to test bigquery manually. - - Instructions: - $ TODO - - Args: - data: A dictionary associating the name of each column with a list of values for that - column at each row. The representation of row values depends on the column type. See - the `fields` argument for details. - fields: A list of database.SpannerFieldInfo objects specifying the name and type - of each in the query result. This list must contain exactly one item per dictionary - entry in `data`, with each list item set as follow: - - `name` is the name of the column. This must match exactly a dictionary key in `data`. - - `typename` specifies the type of the column, which determines how row values for this - column are represented in `data[name]`. Possible `typename` values are as follows: - - "JSON": - Indicates that the column is SQL type `JSON`. Each row value in `data` is a - dictionary associating JSON field names with their corresponding field values, - normally the result of `json.loads()`. To be supported for visualization, the JSON - schema must result in the following fields being present in the dictionary: - - "kind": 'node' or 'edge', depending on whether the object represents a node or edge. - - "identifier": A string representing a unique identifier for the node or edge. - - "labels" A list of strings representing labels associated with the node or edge. - - "properties": A dictionary specifying additional properties of the node or edge. - - "source_node_identifier": (edges only) Specifies `identifier` value for the - corresponding source node. - - "destination_node_identifier": (edges only) Specifies the `identifier` value for the - corresponding destination node. - - See Node.is_valid_node_json() and Edge.is_valid_edge_json() for details. - - "ARRAY": - Indicates that the column is SQL type `ARRAY`. Each row value is a list - of JSON objects (see above for the expected content of each JSON object) - - Anything else means that the column is not supported for visualization. (Passing in - unsupported columns is allowed, but such columns are exluded from the results). - schema_json: An optional dictionary describing the graph schema. This may be None in the graph - schema is unknown. Used as the constructor argument to be `SchemaManager` class; see SchemaManager - for details. - - Returns: - - """ - schema_manager = SchemaManager(schema_json) - nodes: List[Node] = [] - edges: List[Edge] = [] - node_identifiers = set() - edge_identifiers = set() - - # Process each column in the data - for field in fields: - column_name = field.name - column_data = data[column_name] - - # Only process JSON and Array of JSON types - if field.typename not in ["JSON", "ARRAY"]: - continue - - # Process each value in the column - for value in column_data: - items_to_process = [] - - # Handle both single JSON and arrays of JSON - if isinstance(value, list): - items_to_process.extend(value) - elif hasattr(value, '_array_value'): - items_to_process.extend(value._array_value) - else: - # Single JSON value - if isinstance(value, dict): - items_to_process.append(value) - elif isinstance(value, str): - try: - items_to_process.append(json.loads(value)) - except json.JSONDecodeError: - continue - - # Process each item - for item in items_to_process: - if not isinstance(item, dict) or "kind" not in item: - continue - - if item["kind"] == "node" and Node.is_valid_node_json(item): - node = Node.from_json(item) - if node.identifier not in node_identifiers: - node.key_property_names = schema_manager.get_key_property_names(node) - nodes.append(node) - node_identifiers.add(node.identifier) - - elif item["kind"] == "edge" and Edge.is_valid_edge_json(item): - edge = Edge.from_json(item) - if edge.identifier not in edge_identifiers: - edges.append(edge) - edge_identifiers.add(edge.identifier) - - # Create placeholder nodes for nodes that were not returned - # from the query but are identified in the edges - missing_node_identifiers = set() - for edge in edges: - if edge.source not in node_identifiers: - missing_node_identifiers.add(edge.source) - if edge.destination not in node_identifiers: - missing_node_identifiers.add(edge.destination) - - for identifier in missing_node_identifiers: - nodes.append(Node.make_intermediate(identifier)) - node_identifiers.add(identifier) - - return nodes, edges From c4e2c1212179e53938320b7ca24431b017e74d75 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 15:43:25 -0700 Subject: [PATCH 02/10] another commit --- test.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test.md diff --git a/test.md b/test.md new file mode 100644 index 0000000..e69de29 From 9e08633b93d062906930bc28e2c6017a5941738e Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 15:51:20 -0700 Subject: [PATCH 03/10] another commit, again --- test2.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test2.md diff --git a/test2.md b/test2.md new file mode 100644 index 0000000..e69de29 From b06d8cb1f59fcfd55160068eba85e5ab710310b7 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 15:58:19 -0700 Subject: [PATCH 04/10] another commit, again --- test3.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test3.md diff --git a/test3.md b/test3.md new file mode 100644 index 0000000..e69de29 From e985250c394897e286433932e9981a793942f4df Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 16:03:34 -0700 Subject: [PATCH 05/10] another commit, again --- test3.md | 1 + 1 file changed, 1 insertion(+) diff --git a/test3.md b/test3.md index e69de29..9daeafb 100644 --- a/test3.md +++ b/test3.md @@ -0,0 +1 @@ +test From 662a91495f775b0a89844946af9cc4324da44fad Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 16:14:22 -0700 Subject: [PATCH 06/10] another commit, again --- test3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test3.md b/test3.md index 9daeafb..180cf83 100644 --- a/test3.md +++ b/test3.md @@ -1 +1 @@ -test +test2 From 28aadce0ae01dfafe4bc48192f3e9b2cc1994911 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 16:20:17 -0700 Subject: [PATCH 07/10] another commit, again --- test3.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test3.md b/test3.md index 180cf83..0ee6fb0 100644 --- a/test3.md +++ b/test3.md @@ -1 +1,3 @@ test2 +test2 +test2 From 4537dacf3e75e3cd348bae5a88754f701879f643 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 16:23:07 -0700 Subject: [PATCH 08/10] another commit, again --- test3.md | 1 + 1 file changed, 1 insertion(+) diff --git a/test3.md b/test3.md index 0ee6fb0..ab38cff 100644 --- a/test3.md +++ b/test3.md @@ -1,3 +1,4 @@ test2 test2 test2 +test2 From 42813be07d1ad75eff73e0095097e6df9b5eb6d1 Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 17:53:21 -0700 Subject: [PATCH 09/10] another commit, again --- test3.md | 1 + 1 file changed, 1 insertion(+) diff --git a/test3.md b/test3.md index ab38cff..a7c3fbd 100644 --- a/test3.md +++ b/test3.md @@ -2,3 +2,4 @@ test2 test2 test2 test2 +test2 From f456f9450ac2eba69712d896bc66a2bb8957b32f Mon Sep 17 00:00:00 2001 From: cqian23 Date: Wed, 17 Sep 2025 18:00:48 -0700 Subject: [PATCH 10/10] test restore --- spanner_graphs/conversion.py | 142 +++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 spanner_graphs/conversion.py diff --git a/spanner_graphs/conversion.py b/spanner_graphs/conversion.py new file mode 100644 index 0000000..9218ccb --- /dev/null +++ b/spanner_graphs/conversion.py @@ -0,0 +1,142 @@ +# Copyright 2025 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module contains implementation to convert columns from +a database column into usable data for building a graph +""" + +from __future__ import annotations +from typing import Any, List, Dict, Tuple +import json + +from spanner_graphs.database import SpannerFieldInfo +from spanner_graphs.graph_entities import Node, Edge +from spanner_graphs.schema_manager import SchemaManager + +def get_nodes_edges(data: Dict[str, List[Any]], fields: List[SpannerFieldInfo], + schema_json: dict = None) -> Tuple[List[Node], List[Edge]]: + """Retrieves the nodes and edges for query results, in the form expected by the Javascript code. + + WARNING: This function is not only used for graph visualization in Spanner, but also BigQuery. + Currently, infrastructure is set up to only run the Spanner tests, so, when changing this function, + you will need to test bigquery manually. + + Instructions: + $ TODO + + Args: + data: A dictionary associating the name of each column with a list of values for that + column at each row. The representation of row values depends on the column type. See + the `fields` argument for details. + fields: A list of database.SpannerFieldInfo objects specifying the name and type + of each in the query result. This list must contain exactly one item per dictionary + entry in `data`, with each list item set as follow: + - `name` is the name of the column. This must match exactly a dictionary key in `data`. + - `typename` specifies the type of the column, which determines how row values for this + column are represented in `data[name]`. Possible `typename` values are as follows: + - "JSON": + Indicates that the column is SQL type `JSON`. Each row value in `data` is a + dictionary associating JSON field names with their corresponding field values, + normally the result of `json.loads()`. To be supported for visualization, the JSON + schema must result in the following fields being present in the dictionary: + - "kind": 'node' or 'edge', depending on whether the object represents a node or edge. + - "identifier": A string representing a unique identifier for the node or edge. + - "labels" A list of strings representing labels associated with the node or edge. + - "properties": A dictionary specifying additional properties of the node or edge. + - "source_node_identifier": (edges only) Specifies `identifier` value for the + corresponding source node. + - "destination_node_identifier": (edges only) Specifies the `identifier` value for the + corresponding destination node. + + See Node.is_valid_node_json() and Edge.is_valid_edge_json() for details. + - "ARRAY": + Indicates that the column is SQL type `ARRAY`. Each row value is a list + of JSON objects (see above for the expected content of each JSON object) + - Anything else means that the column is not supported for visualization. (Passing in + unsupported columns is allowed, but such columns are exluded from the results). + schema_json: An optional dictionary describing the graph schema. This may be None in the graph + schema is unknown. Used as the constructor argument to be `SchemaManager` class; see SchemaManager + for details. + + Returns: + + """ + schema_manager = SchemaManager(schema_json) + nodes: List[Node] = [] + edges: List[Edge] = [] + node_identifiers = set() + edge_identifiers = set() + + # Process each column in the data + for field in fields: + column_name = field.name + column_data = data[column_name] + + # Only process JSON and Array of JSON types + if field.typename not in ["JSON", "ARRAY"]: + continue + + # Process each value in the column + for value in column_data: + items_to_process = [] + + # Handle both single JSON and arrays of JSON + if isinstance(value, list): + items_to_process.extend(value) + elif hasattr(value, '_array_value'): + items_to_process.extend(value._array_value) + else: + # Single JSON value + if isinstance(value, dict): + items_to_process.append(value) + elif isinstance(value, str): + try: + items_to_process.append(json.loads(value)) + except json.JSONDecodeError: + continue + + # Process each item + for item in items_to_process: + if not isinstance(item, dict) or "kind" not in item: + continue + + if item["kind"] == "node" and Node.is_valid_node_json(item): + node = Node.from_json(item) + if node.identifier not in node_identifiers: + node.key_property_names = schema_manager.get_key_property_names(node) + nodes.append(node) + node_identifiers.add(node.identifier) + + elif item["kind"] == "edge" and Edge.is_valid_edge_json(item): + edge = Edge.from_json(item) + if edge.identifier not in edge_identifiers: + edges.append(edge) + edge_identifiers.add(edge.identifier) + + # Create placeholder nodes for nodes that were not returned + # from the query but are identified in the edges + missing_node_identifiers = set() + for edge in edges: + if edge.source not in node_identifiers: + missing_node_identifiers.add(edge.source) + if edge.destination not in node_identifiers: + missing_node_identifiers.add(edge.destination) + + for identifier in missing_node_identifiers: + nodes.append(Node.make_intermediate(identifier)) + node_identifiers.add(identifier) + + return nodes, edges +