diff --git a/.github/workflows/code-style.yml b/.github/workflows/code-style.yml index 9acfd98e..5ff74165 100644 --- a/.github/workflows/code-style.yml +++ b/.github/workflows/code-style.yml @@ -35,6 +35,7 @@ jobs: - run: pip install ".[dev]" - run: pip install ".[pandas]" - run: pip install ".[gds]" + - run: pip install ".[snowflake]" - name: Check code style run: cd ${GITHUB_WORKSPACE} && ./scripts/checkstyle.sh diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 7b5e2e83..d5fe6e64 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -42,6 +42,7 @@ jobs: - run: pip install ".[pandas]" - run: pip install ".[neo4j]" - run: pip install ".[gds]" + - run: pip install ".[snowflake]" - name: Run tests run: pytest tests/ diff --git a/changelog.md b/changelog.md index 20abd0e8..a6ace320 100644 --- a/changelog.md +++ b/changelog.md @@ -6,11 +6,15 @@ ## New features +* Added new constructor `from_snowflake` that creates visualization graphs from Snowflake tables. + ## Bug fixes ## Improvements +* The `field` parameter of `color_nodes` now also accepts casing other than `snake_case`. + ## Other changes diff --git a/docs/source/api-reference/from_snowflake.rst b/docs/source/api-reference/from_snowflake.rst new file mode 100644 index 00000000..78f7feb7 --- /dev/null +++ b/docs/source/api-reference/from_snowflake.rst @@ -0,0 +1,6 @@ +Import from Snowflake Tables +---------------------------- + +.. automodule:: neo4j_viz.snowflake + :members: + :exclude-members: Orientation, VizProjectConfig, VizRelationshipTableConfig diff --git a/docs/source/api-reference/render_options.rst b/docs/source/api-reference/render_options.rst index b477b3f6..0c1dd13b 100644 --- a/docs/source/api-reference/render_options.rst +++ b/docs/source/api-reference/render_options.rst @@ -18,6 +18,5 @@ .. autoenum:: neo4j_viz.options.Packing :members: - .. autoenum:: neo4j_viz.Renderer :members: diff --git a/docs/source/index.rst b/docs/source/index.rst index 09d3b06e..159d1a99 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,8 +11,9 @@ The library allows you to visualize graph data interactively in Python using a s The library wraps the `Neo4j Visualization JavaScript library (NVL) `_, and provides additional features for working with graph data in Python. -Notably, there are convenience methods for importing data from `Pandas DataFrames `_, -`Neo4j Graph Data Science `_ and `Neo4j Database `_. +Notably, there are convenience methods for importing data from source such as `Pandas DataFrames `_, +`Neo4j Graph Data Science `_, `Neo4j Database `_ +and `Snowflake tables `_. The source code is available on `GitHub `_. If you have a suggestion on how we can improve the library or want to report a problem, you can create a `new issue `_. diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 4adf4c4d..c25259a0 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -24,8 +24,9 @@ To install the additional dependencies required for the :doc:`from_dfs importer pip install neo4j-viz[pandas] + Neo4j ``from_neo4j`` importer -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To install the additional dependencies required for the :doc:`from_neo4j importer <./api-reference/from_neo4j>` you can run: @@ -43,6 +44,17 @@ To install the additional dependencies required for the :doc:`from_gds importer pip install neo4j-viz[gds] + +Snowflake tables ``from_snowflake`` importer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To install the additional dependencies required for the :doc:`from_snowflake importer <./api-reference/from_snowflake>` you can run: + +.. code-block:: bash + + pip install neo4j-viz[snowflake] + + Notebook tutorials ~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/integration.rst b/docs/source/integration.rst index c4ee07b6..e71609e9 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -5,8 +5,9 @@ In addition to creating graphs from scratch, with ``neo4j-viz`` as is shown in t :doc:`Getting started section <./getting-started>`, you can also import data directly from external sources. In this section we will cover how to import data from `Pandas DataFrames `_, `Neo4j Graph Data Science `_, -`Neo4j Database `_ and -`GQL CREATE queries `_. +`Neo4j Database `_, +`GQL CREATE queries `_, +and `Snowflake tables `_. .. contents:: On this page: @@ -19,14 +20,14 @@ Pandas DataFrames ----------------- The ``neo4j-viz`` library provides a convenience method for importing data from Pandas DataFrames. -These DataFrames can be created from many sources, such as CSV files or :doc:`Snowflake tables<./tutorials/snowpark-example>`. +These DataFrames can be created from many sources, such as CSV files. It requires and additional dependency to be installed, which you can do by running: .. code-block:: bash pip install neo4j-viz[pandas] -Once you have installed the additional dependency, you can use the :doc:`from_gds <./api-reference/from_pandas>` method +Once you have installed the additional dependency, you can use the :doc:`from_pandas <./api-reference/from_pandas>` method to import pandas DataFrames. The ``from_dfs`` method takes two mandatory positional parameters: @@ -82,9 +83,6 @@ and :doc:`Relationships <./api-reference/relationship>`. VG = from_dfs(nodes, relationships) -For another example of the ``from_dfs`` importer in action, see the -:doc:`Visualizing Snowflake Tables tutorial <./tutorials/snowpark-example>`. - Neo4j Graph Data Science (GDS) library -------------------------------------- @@ -118,9 +116,9 @@ and will be used to determine the sizes of the nodes in the visualization. The ``additional_node_properties`` parameter is also optional, and should be a list of additional node properties of the projection that you want to include in the visualization. -The default is `None`, which means that all properties of the nodes in the projection will be included. +The default is ``None``, which means that all properties of the nodes in the projection will be included. Apart from being visible through on-hover tooltips, these properties could be used to color the nodes, or give captions -to them in the visualization, or simply included in the nodes' `Node.properties` maps without directly impacting the +to them in the visualization, or simply included in the nodes' ``Node.properties`` maps without directly impacting the visualization. The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for @@ -285,3 +283,122 @@ In this small example, we create a visualization graph from a GQL ``CREATE`` que """ VG = from_gql_create(query) + + +Snowflake Tables +---------------- + +The ``neo4j-viz`` library provides a convenience method for importing data from Snowflake tables. +It requires and additional dependency to be installed, which you can do by running: + +.. code-block:: bash + + pip install neo4j-viz[snowflake] + +Once you have installed the additional dependency, you can use the :doc:`from_snowflake <./api-reference/from_snowflake>` method +to import Snowflake tables into a ``VisualizationGraph``. + +The ``from_snowflake`` method takes two mandatory positional parameters: + +* A ``snowflake.snowpark.Session`` object for the connection to Snowflake, and +* A `project configuration `_ as a dictionary, that specifies how you want your tables to be projected as a graph. + This configuration is the same as the project configuration of the `Neo4j Snowflake Graph Analytics application `_. + +``from_snowflake`` also takes an optional property, ``node_radius_min_max``, that can be used (and is used by default) to +scale the node sizes for the visualization. +It is a tuple of two numbers, representing the radii (sizes) in pixels of the smallest and largest nodes respectively in +the visualization. +The node sizes will be scaled such that the smallest node will have the size of the first value, and the largest node +will have the size of the second value. +The other nodes will be scaled linearly between these two values according to their relative size. +This can be useful if node sizes vary a lot, or are all very small or very big. + + +Special columns +~~~~~~~~~~~~~~~ + +It is possible to modify the visualization directly by including columns of certain specific names in the node and relationship tables. + +All such special columns can be found :doc:`here <./api-reference/node>` for nodes and :doc:`here <./api-reference/relationship>` for relationships. +Though listed in ``snake_case`` here, ``SCREAMING_SNAKE_CASE`` and ``camelCase`` are also supported. +Some of the most commonly used special columns are: + +* **Node sizes**: The sizes of nodes can be controlled by including a column named "SIZE" in node tables. + The values in these columns should be of a numeric type. This can be useful for visualizing the relative importance or size of nodes in the graph, for example using a computed centrality score. + +* **Captions**: The caption text of nodes and relationships can be controlled by including a column named "CAPTION" in the tables. + The values in these columns should be of a string type. This can be useful for displaying additional information about the nodes, such as their names or labels. If no "CAPTION" column is provided, the default captions in the visualization will be the names of the corresponding node and relationship tables. + +Please also note that you can further customize the visualization after the `VisualizationGraph` has been created, by using the methods described in the :doc:`Customizing the visualization <./customizing>` section. + + +Default behavior +~~~~~~~~~~~~~~~~ + +Unless there are "CAPTION" columns in the tables, the node and relationship captions will be set to the names of the corresponding tables. +Similarly, if there are are no "COLOR" node table columns, the nodes will be colored be colored so that nodes from the same table have the same color, and different tables have different colors. + + +Example +~~~~~~~ + +In this small example, we import a toy graph representing a social network from two tables in Snowflake. + +.. code-block:: python + + from snowflake.snowpark import Session + from neo4j_viz.snowflake import from_dfs + + # Configure according to your own setup + connection_parameters = { + "account": os.environ.get("SNOWFLAKE_ACCOUNT"), + "user": os.environ.get("SNOWFLAKE_USER"), + "password": os.environ.get("SNOWFLAKE_PASSWORD"), + "role": os.environ.get("SNOWFLAKE_ROLE"), + "warehouse": os.environ.get("SNOWFLAKE_WAREHOUSE"), + } + + session.sql( + "CREATE OR REPLACE TABLE EXAMPLE_DB.DATA_SCHEMA.PERSONS (NODEID VARCHAR);" + ).collect() + + session.sql(""" + INSERT INTO EXAMPLE_DB.DATA_SCHEMA.PERSONS VALUES + ('Alice'), + ('Bob'), + ('Carol'), + ('Dave'), + ('Eve'); + """).collect() + + session.sql( + "CREATE OR REPLACE TABLE EXAMPLE_DB.DATA_SCHEMA.KNOWS (SOURCENODEID VARCHAR, TARGETNODEID VARCHAR);" + ).collect() + + session.sql(""" + INSERT INTO EXAMPLE_DB.DATA_SCHEMA.KNOWS VALUES + ('Alice', 'Dave'), + ('Alice', 'Carol'), + ('Bob', 'Carol'), + ('Dave', 'Eve'), + """).collect() + + VG = from_snowflake( + session, + { + "nodeTables": [ + "EXAMPLE_DB.DATA_SCHEMA.PERSONS", + ], + "relationshipTables": { + "EXAMPLE_DB.DATA_SCHEMA.KNOWS": { + "sourceTable": "EXAMPLE_DB.DATA_SCHEMA.PERSONS", + "targetTable": "EXAMPLE_DB.DATA_SCHEMA.PERSONS", + "orientation": "UNDIRECTED", + } + }, + }, + ) + +For a full example of the ``from_snowflake`` importer in action, please see the +:doc:`Visualizing Snowflake Tables tutorial <./tutorials/snowflake-example>`. + diff --git a/docs/source/tutorials/snowflake-example.nblink b/docs/source/tutorials/snowflake-example.nblink new file mode 100644 index 00000000..8899606b --- /dev/null +++ b/docs/source/tutorials/snowflake-example.nblink @@ -0,0 +1,3 @@ +{ + "path": "../../../examples/snowflake-example.ipynb" +} diff --git a/docs/source/tutorials/snowpark-example.nblink b/docs/source/tutorials/snowpark-example.nblink deleted file mode 100644 index d812a4df..00000000 --- a/docs/source/tutorials/snowpark-example.nblink +++ /dev/null @@ -1,3 +0,0 @@ -{ - "path": "../../../examples/snowpark-example.ipynb" -} diff --git a/examples/snowflake-example.ipynb b/examples/snowflake-example.ipynb new file mode 100644 index 00000000..7d23be78 --- /dev/null +++ b/examples/snowflake-example.ipynb @@ -0,0 +1,554 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0d3ffc27", + "metadata": {}, + "source": [ + "# Visualizing Snowflake Tables as a Graph" + ] + }, + { + "cell_type": "markdown", + "id": "6b83277d", + "metadata": {}, + "source": [ + "This is a brief but complete example of how to visualize graphs represented by tables in Snowflake, using the Graph Visualization for Python library for Neo4j.\n", + "The API for this is based on how one defines graph projections for the [Neo4j Graph Analytics for Snowflake application](https://neo4j.com/docs/snowflake-graph-analytics/current/)." + ] + }, + { + "cell_type": "markdown", + "id": "168b2f0ec9520f4a", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "We will start by installing the necessary Python library requirements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39e8a71b", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install neo4j-viz[snowflake]" + ] + }, + { + "cell_type": "markdown", + "id": "c91214441edff2d", + "metadata": {}, + "source": [ + "We can now proceed to set up our connection to Snowflake by initializing a new session.\n", + "Please note that you may need more or fewer connection parameters depending on your Snowflake configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "801d0bed", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from snowflake.snowpark import Session\n", + "\n", + "# Configure according to your own setup\n", + "connection_parameters = {\n", + " \"account\": os.environ.get(\"SNOWFLAKE_ACCOUNT\"),\n", + " \"user\": os.environ.get(\"SNOWFLAKE_USER\"),\n", + " \"password\": os.environ.get(\"SNOWFLAKE_PASSWORD\"),\n", + " \"role\": os.environ.get(\"SNOWFLAKE_ROLE\"),\n", + " \"warehouse\": os.environ.get(\"SNOWFLAKE_WAREHOUSE\"),\n", + "}\n", + "\n", + "session = Session.builder.configs(connection_parameters).create()" + ] + }, + { + "cell_type": "markdown", + "id": "365a1c31", + "metadata": {}, + "source": [ + "## Creating tables\n", + "\n", + "In order to have something to visualize, we will now proceed to create a small example graph, represented by tables in Snowflake.\n", + "The first table we create will represent person nodes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d935b3d4", + "metadata": {}, + "outputs": [], + "source": [ + "session.sql(\n", + " \"CREATE OR REPLACE TABLE EXAMPLE_DB.DATA_SCHEMA.PERSONS (NODEID VARCHAR);\"\n", + ").collect()\n", + "\n", + "session.sql(\"\"\"\n", + "INSERT INTO EXAMPLE_DB.DATA_SCHEMA.PERSONS VALUES\n", + " ('Alice'),\n", + " ('Bob'),\n", + " ('Carol'),\n", + " ('Dave'),\n", + " ('Eve');\n", + " \"\"\").collect()" + ] + }, + { + "cell_type": "markdown", + "id": "427516c826c98949", + "metadata": {}, + "source": [ + "The second table we create will also be one of nodes, but this time representing musical instruments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "512a23dea8cdc194", + "metadata": {}, + "outputs": [], + "source": [ + "session.sql(\n", + " \"CREATE OR REPLACE TABLE EXAMPLE_DB.DATA_SCHEMA.INSTRUMENTS (NODEID VARCHAR);\"\n", + ").collect()\n", + "\n", + "session.sql(\"\"\"\n", + " INSERT INTO EXAMPLE_DB.DATA_SCHEMA.INSTRUMENTS VALUES\n", + " ('Guitar'),\n", + " ('Synthesizer'),\n", + " ('Bongos'),\n", + " ('Trumpet');\n", + " \"\"\").collect()" + ] + }, + { + "cell_type": "markdown", + "id": "cf08716eb4275659", + "metadata": {}, + "source": [ + "In order to make a graph out of this, we should have some relations connecting nodes together.\n", + "The following table contains relationships from person to instrument nodes, representing that persons liking instruments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be2ac16d3bd41e6", + "metadata": {}, + "outputs": [], + "source": [ + "session.sql(\n", + " \"CREATE OR REPLACE TABLE EXAMPLE_DB.DATA_SCHEMA.LIKES (SOURCENODEID VARCHAR, TARGETNODEID VARCHAR);\"\n", + ").collect()\n", + "\n", + "session.sql(\"\"\"\n", + "INSERT INTO EXAMPLE_DB.DATA_SCHEMA.LIKES VALUES\n", + " ('Alice', 'Guitar'),\n", + " ('Alice', 'Synthesizer'),\n", + " ('Alice', 'Bongos'),\n", + " ('Bob', 'Guitar'),\n", + " ('Bob', 'Synthesizer'),\n", + " ('Carol', 'Bongos'),\n", + " ('Dave', 'Guitar'),\n", + " ('Dave', 'Trumpet'),\n", + " ('Dave', 'Bongos');\n", + " \"\"\").collect()" + ] + }, + { + "cell_type": "markdown", + "id": "aefcf88c33474c11", + "metadata": {}, + "source": [ + "## Creating the Visualization Graph\n", + "\n", + "Now that we have our data set, we are ready to generate a `VisualizationGraph` that we can subsequently render.\n", + "We do so with a call go the `from_snowflake` convenience constructor of the `neo4j-viz` library.\n", + "Along with our Snowflake session, the input is a [project configuration](https://neo4j.com/docs/snowflake-graph-analytics/current/jobs/#jobs-project) that defines how we want the tables to be represented as a graph.\n", + "The project configuration syntax is the same as in the Neo4j Graph Analytics for Snowflake application." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bcfab01-867b-4dd4-a6dc-941ff71f5a2b", + "metadata": {}, + "outputs": [], + "source": [ + "from neo4j_viz.snowflake import from_snowflake\n", + "\n", + "VG = from_snowflake(\n", + " session,\n", + " {\n", + " \"nodeTables\": [\n", + " \"EXAMPLE_DB.DATA_SCHEMA.PERSONS\",\n", + " \"EXAMPLE_DB.DATA_SCHEMA.INSTRUMENTS\",\n", + " ],\n", + " \"relationshipTables\": {\n", + " \"EXAMPLE_DB.DATA_SCHEMA.LIKES\": {\n", + " \"sourceTable\": \"EXAMPLE_DB.DATA_SCHEMA.PERSONS\",\n", + " \"targetTable\": \"EXAMPLE_DB.DATA_SCHEMA.INSTRUMENTS\",\n", + " }\n", + " },\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2cd47fba794b9b6d", + "metadata": {}, + "source": [ + "To sanity check our visualization graph, let us count that the number of relationships is indeed nine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5922db83623e011", + "metadata": {}, + "outputs": [], + "source": [ + "len(VG.relationships)" + ] + }, + { + "cell_type": "markdown", + "id": "950e0e76cfcaf3d6", + "metadata": {}, + "source": [ + "## Rendering the visualization" + ] + }, + { + "cell_type": "markdown", + "id": "da39f29deb1569e2", + "metadata": {}, + "source": [ + "Let us now render our visualization graph, using only default render options." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e8b0f4c6", + "metadata": { + "tags": [ + "preserve-output" + ] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "VG.render()" + ] + }, + { + "cell_type": "markdown", + "id": "1a422eb76831c051", + "metadata": {}, + "source": [ + "The graph renders nicely, and we see that our two node types, \"PERSONS\" and \"INSTRUMENTS\", are colored and captioned differently.\n", + "By default, table names will determine both node and relationship captions, as well as the node coloring.\n", + "We can also see this in that relationships are rendered as arrows with the \"LIKES\" caption.\n", + "\n", + "We can zoom in and out (mouse scroll-wheel), pan around, move nodes, and hover over nodes and relationships to see their properties.\n", + "The buttons on the top right also allow us to zoom, in addition to taking PNG snapshots of the graph." + ] + }, + { + "cell_type": "markdown", + "id": "45dddcab-b20f-4848-b091-86d704f7a8f5", + "metadata": {}, + "source": [ + "## Customizing the visualization\n", + "\n", + "If we are not completely satisfied with the graph is rendered, there are ways to customize it.\n", + "For example, we could change it so that every nodes gets its own color, by using the `color_nodes` method.\n", + "By passing it the `property` \"SNOWFLAKEID\", which will be unique for all nodes, it will give each node a new color.\n", + "We also make sure to set `override` to override the default coloring." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a59b91d9cccf9987", + "metadata": { + "tags": [ + "preserve-output" + ] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "VG.color_nodes(property=\"SNOWFLAKEID\", override=True)\n", + "VG.render()" + ] + }, + { + "cell_type": "markdown", + "id": "8aa5576fa3b25383", + "metadata": {}, + "source": [ + "## Cleanup\n", + "\n", + "Lastly, we lets clean up the tables we created and close our Snowflake session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6eb218e892d420e", + "metadata": {}, + "outputs": [], + "source": [ + "session.sql(\"DROP TABLE IF EXISTS EXAMPLE_DB.DATA_SCHEMA.PERSONS\").collect()\n", + "session.sql(\"DROP TABLE IF EXISTS EXAMPLE_DB.DATA_SCHEMA.INSTRUMENTS\").collect()\n", + "session.sql(\"DROP TABLE IF EXISTS EXAMPLE_DB.DATA_SCHEMA.LIKES\").collect()\n", + "\n", + "session.close()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/snowpark-example.ipynb b/examples/snowpark-example.ipynb deleted file mode 100644 index 647cbfc5..00000000 --- a/examples/snowpark-example.ipynb +++ /dev/null @@ -1,387 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0d3ffc27", - "metadata": {}, - "source": [ - "# Visualizing Snowflake Tables" - ] - }, - { - "cell_type": "markdown", - "id": "6b83277d", - "metadata": {}, - "source": [ - "\n", - "This is a brief but complete example of how to visualize graphs represented by tables in Snowflake, using the Graph Visualization for Python library for Neo4j." - ] - }, - { - "cell_type": "markdown", - "id": "168b2f0ec9520f4a", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "We will start by installing the necessary Python library requirements." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "39e8a71b", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install snowflake-snowpark-python # Requires Python version <= 3.11\n", - "%pip install neo4j-viz" - ] - }, - { - "cell_type": "markdown", - "id": "c91214441edff2d", - "metadata": {}, - "source": [ - "We can now proceed to set up our connection to Snowflake by initializing a new session.\n", - "Please not that you may need more or fewer connection parameters depending on your Snowflake configuration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "801d0bed", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "from snowflake.snowpark import Session\n", - "\n", - "# Configure according to your own setup\n", - "connection_parameters = {\n", - " \"account\": os.environ.get(\"SNOWFLAKE_ACCOUNT\"),\n", - " \"user\": os.environ.get(\"SNOWFLAKE_USER\"),\n", - " \"password\": os.environ.get(\"SNOWFLAKE_PASSWORD\"),\n", - " \"role\": os.environ.get(\"SNOWFLAKE_ROLE\"),\n", - " \"warehouse\": os.environ.get(\"SNOWFLAKE_WAREHOUSE\"),\n", - "}\n", - "\n", - "session = Session.builder.configs(connection_parameters).create()" - ] - }, - { - "cell_type": "markdown", - "id": "5ff57d28a917c569", - "metadata": {}, - "source": [ - "Now can we create a new Snowflake database where we can put our little example tables.\n", - "If you already have a database you want to use, you can skip this step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41ad4289420a9b36", - "metadata": {}, - "outputs": [], - "source": [ - "session.sql(\n", - " \"CREATE DATABASE IF NOT EXISTS nvl_example DATA_RETENTION_TIME_IN_DAYS = 1\"\n", - ").collect()\n", - "session.sql(\"USE DATABASE nvl_example\").collect()" - ] - }, - { - "cell_type": "markdown", - "id": "365a1c31", - "metadata": {}, - "source": [ - "## Creating tables\n", - "\n", - "Next we will create a new table for the nodes in our graph, that will represent products of various categories." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d935b3d4", - "metadata": {}, - "outputs": [], - "source": [ - "session.sql(\n", - " \"CREATE OR REPLACE TABLE products (id INT, name VARCHAR, category INT)\"\n", - ").collect()\n", - "\n", - "session.sql(\"\"\"\n", - "INSERT INTO products VALUES\n", - "(1, 'Product 1', 1),\n", - "(2, 'Product 1A', 1),\n", - "(3, 'Product 1B', 1),\n", - "(4, 'Product 2', 2),\n", - "(5, 'Product 2A', 2),\n", - "(6, 'Product 2B', 2),\n", - "(7, 'Product 3', 3),\n", - "(8, 'Product 3A', 3),\n", - "(9, 'Product 3B', 3),\n", - "(10, 'Product 4', 4),\n", - "(11, 'Product 4A', 4),\n", - "(12, 'Product 4B', 4)\n", - "\"\"\").collect()" - ] - }, - { - "cell_type": "markdown", - "id": "cf08716eb4275659", - "metadata": {}, - "source": [ - "Some of the products, are \"subproducts\" of certain parent products.\n", - "We now create a table that encodes these \"PARENT\" relationships between the products." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be2ac16d3bd41e6", - "metadata": {}, - "outputs": [], - "source": [ - "session.sql(\n", - " \"CREATE OR REPLACE TABLE parents (source INT, target INT, type VARCHAR)\"\n", - ").collect()\n", - "\n", - "session.sql(\"\"\"\n", - "INSERT INTO parents VALUES\n", - "(2, 1, 'PARENT'),\n", - "(3, 1, 'PARENT'),\n", - "(5, 4, 'PARENT'),\n", - "(6, 4, 'PARENT'),\n", - "(8, 7, 'PARENT'),\n", - "(9, 7, 'PARENT'),\n", - "(11, 10, 'PARENT'),\n", - "(12, 10, 'PARENT')\n", - "\"\"\").collect()" - ] - }, - { - "cell_type": "markdown", - "id": "a28bd5aa", - "metadata": {}, - "source": [ - "## Fetching the data\n", - "\n", - "Next we fetch our tables from Snowflake and convert them to pandas DataFrames.\n", - "Additionally, we rename the most of the table columns so that they are named according to the `neo4j-viz` API." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "deb6353193e2338b", - "metadata": {}, - "outputs": [], - "source": [ - "products_df = session.table(\"products\").to_pandas().rename(columns={\"NAME\": \"caption\"})\n", - "parents_df = session.table(\"parents\").to_pandas().rename(columns={\"TYPE\": \"caption\"})\n", - "products_df" - ] - }, - { - "cell_type": "markdown", - "id": "950e0e76cfcaf3d6", - "metadata": {}, - "source": [ - "## Rendering the visualization\n", - "With only one command we can now create a `VisualizationGraph` from these tables representing nodes and relationships.\n", - "In order to enhance the visualization, we will also be utilizing the `color_nodes` function, which will assign a distinct color to each product category." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "887f41b7a243d439", - "metadata": {}, - "outputs": [], - "source": [ - "from neo4j_viz.pandas import from_dfs\n", - "\n", - "VG = from_dfs(products_df, parents_df)\n", - "\n", - "# Using the default Neo4j color scheme\n", - "VG.color_nodes(property=\"CATEGORY\")" - ] - }, - { - "cell_type": "markdown", - "id": "da39f29deb1569e2", - "metadata": {}, - "source": [ - "Let us now render our graph, using only default render options." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e8b0f4c6", - "metadata": { - "tags": [ - "preserve-output" - ] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "
\n", - "
\n", - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "VG.render()" - ] - }, - { - "cell_type": "markdown", - "id": "ac4c5e35a602ede2", - "metadata": {}, - "source": [ - "You can scroll to zoom in and out in the visualization, and click-and-drag nodes to move them." - ] - }, - { - "cell_type": "markdown", - "id": "8aa5576fa3b25383", - "metadata": {}, - "source": [ - "## Cleanup\n", - "\n", - "Lastly, we clean up the example database we created." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6eb218e892d420e", - "metadata": {}, - "outputs": [], - "source": [ - "session.sql(\"DROP DATABASE IF EXISTS nvl_example\").collect()\n", - "session.close()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/python-wrapper/pyproject.toml b/python-wrapper/pyproject.toml index f242463d..dfd0e1b6 100644 --- a/python-wrapper/pyproject.toml +++ b/python-wrapper/pyproject.toml @@ -42,7 +42,7 @@ requires-python = ">=3.9" [project.optional-dependencies] dev = [ "ruff==0.11.8", - "mypy==1.15.0", + "mypy==1.17.1", "pytest==8.3.4", "selenium==4.32.0", "ipykernel==6.29.5", @@ -61,6 +61,7 @@ docs = [ pandas = ["pandas>=2, <3", "pandas-stubs>=2, <3"] gds = ["graphdatascience>=1, <2"] neo4j = ["neo4j"] +snowflake = ["snowflake-snowpark-python>=1, <2"] notebook = [ "ipykernel>=6.29.5", "pykernel>=0.1.6", @@ -68,7 +69,7 @@ notebook = [ "ipywidgets>=8.0.0", "palettable>=3.3.3", "matplotlib>=3.9.4", - "snowflake-snowpark-python==1.26.0", + "snowflake-snowpark-python==1.37.0", ] [project.urls] diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 1600d62d..310c6214 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -2,7 +2,7 @@ import warnings from itertools import chain -from typing import Optional +from typing import Optional, cast from uuid import uuid4 import pandas as pd @@ -81,31 +81,32 @@ def from_gds( """ node_properties_from_gds = G.node_properties() assert isinstance(node_properties_from_gds, pd.Series) - actual_node_properties = node_properties_from_gds.to_dict() + actual_node_properties: dict[str, list[str]] = cast(dict[str, list[str]], node_properties_from_gds.to_dict()) all_actual_node_properties = list(chain.from_iterable(actual_node_properties.values())) if size_property is not None: if size_property not in all_actual_node_properties: raise ValueError(f"There is no node property '{size_property}' in graph '{G.name()}'") + node_properties_by_label_sets: dict[str, set[str]] = dict() if additional_node_properties is None: - node_properties_by_label = {k: set(v) for k, v in actual_node_properties.items()} + node_properties_by_label_sets = {k: set(v) for k, v in actual_node_properties.items()} else: for prop in additional_node_properties: if prop not in all_actual_node_properties: raise ValueError(f"There is no node property '{prop}' in graph '{G.name()}'") - node_properties_by_label = {} for label, props in actual_node_properties.items(): - node_properties_by_label[label] = { + node_properties_by_label_sets[label] = { prop for prop in actual_node_properties[label] if prop in additional_node_properties } if size_property is not None: - for label, props in node_properties_by_label.items(): - props.add(size_property) + # For some reason mypy are unable to understand that this is dict[str, set[str]] + for label, props in node_properties_by_label_sets.items(): # type: ignore + props.add(size_property) # type: ignore - node_properties_by_label = {k: list(v) for k, v in node_properties_by_label.items()} + node_properties_by_label = {k: list(v) for k, v in node_properties_by_label_sets.items()} node_count = G.node_count() if node_count > max_node_count: @@ -148,7 +149,7 @@ def from_gds( if size_property is not None: if "size" in all_actual_node_properties and size_property != "size": node_props_df.rename(columns={"size": "__size"}, inplace=True) - if size_property not in additional_node_properties: + if additional_node_properties is not None and size_property not in additional_node_properties: node_props_df.rename(columns={size_property: "size"}, inplace=True) else: node_props_df["size"] = node_props_df[size_property] diff --git a/python-wrapper/src/neo4j_viz/snowflake.py b/python-wrapper/src/neo4j_viz/snowflake.py new file mode 100644 index 00000000..ac237921 --- /dev/null +++ b/python-wrapper/src/neo4j_viz/snowflake.py @@ -0,0 +1,344 @@ +from __future__ import annotations + +from enum import Enum +from typing import Annotated, Any, Optional + +from pandas import DataFrame +from pydantic import ( + AfterValidator, + BaseModel, + BeforeValidator, +) +from pydantic_core.core_schema import ValidationInfo +from snowflake.snowpark import Session +from snowflake.snowpark.exceptions import SnowparkSQLException +from snowflake.snowpark.types import ( + ArrayType, + BooleanType, + ByteType, + DataType, + DateType, + DecimalType, + DoubleType, + FloatType, + GeographyType, + GeometryType, + IntegerType, + LongType, + MapType, + ShortType, + StringType, + StructField, + StructType, + TimestampType, + TimeType, + VariantType, + VectorType, +) + +from neo4j_viz import VisualizationGraph +from neo4j_viz.colors import ColorSpace +from neo4j_viz.pandas import from_dfs + + +def _data_type_name(type: DataType) -> str: + if isinstance(type, StringType): + return "VARCHAR" + elif isinstance(type, LongType): + return "BIGINT" + elif isinstance(type, IntegerType): + return "INT" + elif isinstance(type, DoubleType): + return "DOUBLE" + elif isinstance(type, DecimalType): + return "NUMBER" + elif isinstance(type, BooleanType): + return "BOOLEAN" + elif isinstance(type, ByteType): + return "TINYINT" + elif isinstance(type, DateType): + return "DATE" + elif isinstance(type, ShortType): + return "SMALLINT" + elif isinstance(type, FloatType): + return "FLOAT" + elif isinstance(type, ArrayType): + return "ARRAY" + elif isinstance(type, VectorType): + return "VECTOR" + elif isinstance(type, MapType): + return "OBJECT" + elif isinstance(type, TimeType): + return "TIME" + elif isinstance(type, TimestampType): + return "TIMESTAMP" + elif isinstance(type, VariantType): + return "VARIANT" + elif isinstance(type, GeographyType): + return "GEOGRAPHY" + elif isinstance(type, GeometryType): + return "GEOMETRY" + else: + # This actually does the job much of the time anyway + return type.simple_string().upper() + + +SUPPORTED_ID_TYPES = [_data_type_name(data_type) for data_type in [StringType(), LongType(), IntegerType()]] + + +def _validate_id_column(schema: StructType, column_name: str, index: int, supported_types: list[str]) -> None: + if column_name.lower() not in [name.lower() for name in schema.names]: + raise ValueError(f"Schema must contain a `{column_name}` column") + + field: StructField = schema.fields[index] + + if field.name.lower() != column_name.lower(): + raise ValueError(f"Column `{column_name}` must have column index {index}") + + if _data_type_name(field.datatype) not in supported_types: + raise ValueError( + f"Column `{column_name}` has invalid type `{_data_type_name(field.datatype)}`. Expected one of [{', '.join(supported_types)}]" + ) + + +def _validate_viz_node_table(table: str, info: ValidationInfo) -> str: + context = info.context + if context and context["session"] is not None: + session = context["session"] + try: + schema = session.table(table).schema + _validate_id_column(schema, "nodeId", 0, SUPPORTED_ID_TYPES) + except SnowparkSQLException as e: + raise ValueError(f"Table '{table}' does not exist or is not accessible.") from e + return table + + +def _validate_viz_relationship_table( + table: str, + info: ValidationInfo, +) -> str: + context = info.context + if context and context["session"] is not None: + session = context["session"] + try: + schema = session.table(table).schema + _validate_id_column(schema, "sourceNodeId", 0, SUPPORTED_ID_TYPES) + _validate_id_column(schema, "targetNodeId", 1, SUPPORTED_ID_TYPES) + except SnowparkSQLException as e: + raise ValueError(f"Table '{table}' does not exist or is not accessible.") from e + return table + + +def _parse_identifier_groups(identifier: str) -> list[str]: + """ + Parses a table identifier into a list of individual identifier groups. + + This function handles identifiers that may include double-quoted segments + and ensures proper validation of the identifier's structure. It raises + errors for invalid formats, such as unbalanced quotes, invalid characters, + or improper use of dots. + + Args: + identifier (str): The input string identifier to parse. + + Returns: + list[str]: A list of parsed identifier groups. + + Raises: + ValueError: If the identifier contains: + - Empty double quotes. + - Consecutive dots outside of double quotes. + - Unbalanced double quotes. + - Invalid characters in unquoted segments. + - Improper placement of dots around double-quoted segments. + """ + inside = False # Tracks whether the current character is inside double quotes + quoted_starts = [] # Stores the start indices of double-quoted segments + quoted_ends = [] # Stores the end indices of double-quoted segments + remaining = "" # Stores the unquoted part of the identifier + previous_is_dot = False # Tracks if the previous character was a dot + + for i, c in enumerate(identifier): + if c == '"': + if not inside: + quoted_starts.append(i + 1) # Mark the start of a quoted segment + previous_is_dot = False + else: + quoted_ends.append(i) # Mark the end of a quoted segment + if quoted_ends[-1] - quoted_starts[-1] == 0: + raise ValueError("Empty double quotes") + inside = not inside # Toggle the inside state + else: + if not inside: + remaining += c # Append unquoted characters to `remaining` + if c == ".": + if previous_is_dot: + raise ValueError("Not ok to have consecutive dots outside of double quote") + previous_is_dot = True + else: + previous_is_dot = False + + if len(quoted_starts) != len(quoted_ends): + raise ValueError("Unbalanced double quotes") + + for quoted_start in quoted_starts: + if quoted_start > 1: + if identifier[quoted_start - 2] != ".": + raise ValueError("Only dot character may precede before double quoted identifier") + + for quoted_end in quoted_ends: + if quoted_end < len(identifier) - 1: + if identifier[quoted_end + 1] != ".": + raise ValueError("Only dot character may follow double quoted identifier") + + words = remaining.split(".") # Split the unquoted part by dots + for word in words: + if len(word) == 0: + continue + if word.lower()[0] not in "abcdefghijklmnopqrstuvwxyz_": + raise ValueError(f"Invalid first character in identifier {word}. Only a-z, A-Z, and _ are allowed.") + if not set(word.lower()).issubset(set("abcdefghijklmnopqrstuvwxyz$_0123456789")): + raise ValueError(f"Invalid characters in identifier {word}. Only a-z, A-Z, 0-9, _, and $ are allowed.") + + empty_words_idx = [i for i, w in enumerate(words) if w == ""] + for i in range(len(quoted_starts)): + # Replace empty words with their corresponding quoted segments + words[empty_words_idx[i]] = f'"{identifier[quoted_starts[i] : quoted_ends[i]]}"' + + return words + + +def _validate_table_name(table: str) -> str: + if not isinstance(table, str): + raise TypeError(f"Table name must be a string, got {type(table).__name__}") + + try: + words = _parse_identifier_groups(table) + except ValueError as e: + raise ValueError(f"Invalid table name '{table}'. {str(e)}") from e + + if len(words) not in {1, 3}: + raise ValueError( + f"Invalid table name '{table}'. Table names must be in the format '..' or '
'" + ) + + return table + + +Table = Annotated[str, BeforeValidator(_validate_table_name)] + +VizNodeTable = Annotated[Table, AfterValidator(_validate_viz_node_table)] +VizRelationshipTable = Annotated[Table, AfterValidator(_validate_viz_relationship_table)] + + +class Orientation(Enum): + NATURAL = "natural" + UNDIRECTED = "undirected" + REVERSE = "reverse" + + +def _to_lower(value: str) -> str: + return value.lower() if value and isinstance(value, str) else value + + +LowercaseOrientation = Annotated[Orientation, BeforeValidator(_to_lower)] + + +class VizRelationshipTableConfig(BaseModel, extra="forbid"): + sourceTable: VizNodeTable + targetTable: VizNodeTable + orientation: Optional[LowercaseOrientation] = Orientation.NATURAL + + +class VizProjectConfig(BaseModel, extra="forbid"): + defaultTablePrefix: Optional[str] = None + nodeTables: list[VizNodeTable] + relationshipTables: dict[VizRelationshipTable, VizRelationshipTableConfig] + + +def _map_tables( + session: Session, project_model: VizProjectConfig +) -> tuple[list[DataFrame], list[DataFrame], list[str]]: + offset = 0 + to_internal = {} + node_dfs = [] + for table in project_model.nodeTables: + df = session.table(table).to_pandas() + internal_ids = range(offset, offset + df.shape[0]) + to_internal[table] = df[["NODEID"]].copy() + to_internal[table]["INTERNALID"] = internal_ids + offset += df.shape[0] + + df["SNOWFLAKEID"] = df["NODEID"] + df["NODEID"] = internal_ids + + node_dfs.append(df) + + rel_dfs = [] + rel_table_names = [] + for table, rel_table_config in project_model.relationshipTables.items(): + df = session.table(table).to_pandas() + + source_table = rel_table_config.sourceTable + target_table = rel_table_config.targetTable + + df = df.merge(to_internal[source_table], left_on="SOURCENODEID", right_on="NODEID") + df.drop(["SOURCENODEID", "NODEID"], axis=1, inplace=True) + df.rename({"INTERNALID": "SOURCENODEID"}, axis=1, inplace=True) + df = df.merge(to_internal[target_table], left_on="TARGETNODEID", right_on="NODEID") + df.drop(["TARGETNODEID", "NODEID"], axis=1, inplace=True) + df.rename({"INTERNALID": "TARGETNODEID"}, axis=1, inplace=True) + + if ( + rel_table_config.orientation == Orientation.NATURAL + or rel_table_config.orientation == Orientation.UNDIRECTED + ): + rel_dfs.append(df) + rel_table_names.append(table) + + if rel_table_config.orientation == Orientation.REVERSE: + df_rev = df.rename(columns={"SOURCENODEID": "TARGETNODEID", "TARGETNODEID": "SOURCENODEID"}, copy=False) + rel_dfs.append(df_rev) + rel_table_names.append(table) + + if rel_table_config.orientation == Orientation.UNDIRECTED: + df_rev = df.rename(columns={"SOURCENODEID": "TARGETNODEID", "TARGETNODEID": "SOURCENODEID"}, copy=True) + rel_dfs.append(df_rev) + rel_table_names.append(table) + + return node_dfs, rel_dfs, rel_table_names + + +def from_snowflake( + session: Session, + project_config: dict[str, Any], + node_radius_min_max: Optional[tuple[float, float]] = (3, 60), +) -> VisualizationGraph: + project_model = VizProjectConfig.model_validate(project_config, strict=False, context={"session": session}) + node_dfs, rel_dfs, rel_table_names = _map_tables(session, project_model) + + node_caption_present = False + for node_df in node_dfs: + if "CAPTION" in node_df.columns: + node_caption_present = True + break + + if not node_caption_present: + for i, node_df in enumerate(node_dfs): + node_df["caption"] = project_model.nodeTables[i].split(".")[-1] + + rel_caption_present = False + for rel_df in rel_dfs: + if "CAPTION" in rel_df.columns: + rel_caption_present = True + break + + if not rel_caption_present: + for i, rel_df in enumerate(rel_dfs): + rel_df["caption"] = rel_table_names[i].split(".")[-1] + + VG = from_dfs(node_dfs, rel_dfs, node_radius_min_max) + + VG.color_nodes(field="caption", color_space=ColorSpace.DISCRETE) + + return VG diff --git a/python-wrapper/src/neo4j_viz/visualization_graph.py b/python-wrapper/src/neo4j_viz/visualization_graph.py index 33a188bc..c3387dd2 100644 --- a/python-wrapper/src/neo4j_viz/visualization_graph.py +++ b/python-wrapper/src/neo4j_viz/visualization_graph.py @@ -5,6 +5,7 @@ from typing import Any, Callable, Hashable, Optional, Union from IPython.display import HTML +from pydantic.alias_generators import to_snake from pydantic_extra_types.color import Color, ColorType from .colors import NEO4J_COLORS_CONTINUOUS, NEO4J_COLORS_DISCRETE, ColorSpace, ColorsType @@ -277,7 +278,7 @@ def node_to_attr(node: Node) -> Any: return node.properties.get(attribute) else: assert field is not None - attribute = field + attribute = to_snake(field) def node_to_attr(node: Node) -> Any: return getattr(node, attribute) diff --git a/python-wrapper/tests/test_snowflake.py b/python-wrapper/tests/test_snowflake.py new file mode 100644 index 00000000..b7bbecea --- /dev/null +++ b/python-wrapper/tests/test_snowflake.py @@ -0,0 +1,77 @@ +import pytest +from snowflake.snowpark import Session +from snowflake.snowpark.types import LongType, StructField, StructType + +from neo4j_viz.snowflake import from_snowflake + + +@pytest.fixture +def session() -> Session: + return Session.builder.configs({"local_testing": True}).create() # type: ignore[no-any-return] + + +@pytest.fixture +def session_with_minimal_graph(session: Session) -> Session: + """ + Create a minimal graph with two nodes and one relationship. + """ + node_df = session.create_dataframe( + data=[ + [6], + [7], + ], + schema=StructType( + [ + StructField("NODEID", LongType()), + ] + ), + ) + node_df.write.save_as_table("NODES") + + rel_df = session.create_dataframe( + data=[ + [6, 7], + ], + schema=StructType( + [ + StructField("SOURCENODEID", LongType()), + StructField("TARGETNODEID", LongType()), + ] + ), + ) + rel_df.write.save_as_table("RELS") + + return session + + +def test_from_snowflake(session_with_minimal_graph: Session) -> None: + VG = from_snowflake( + session_with_minimal_graph, + { + "nodeTables": ["NODES"], + "relationshipTables": { + "RELS": { + "sourceTable": "NODES", + "targetTable": "NODES", + }, + }, + }, + ) + + assert len(VG.nodes) == 2 + + assert VG.nodes[0].id == 0 + assert VG.nodes[0].caption == "NODES" + assert VG.nodes[0].color is not None + assert VG.nodes[0].properties == {"SNOWFLAKEID": 6} + + assert VG.nodes[1].id == 1 + assert VG.nodes[1].caption == "NODES" + assert VG.nodes[0].color is not None + assert VG.nodes[1].properties == {"SNOWFLAKEID": 7} + + assert len(VG.relationships) == 1 + + assert VG.relationships[0].source == 0 + assert VG.relationships[0].target == 1 + assert VG.relationships[0].caption == "RELS"