From de7d534f5daff13e6469394b829ec553446ac310 Mon Sep 17 00:00:00 2001 From: Gardenia-zx <3469959715@qq.com> Date: Sun, 3 May 2026 13:08:13 +0800 Subject: [PATCH 1/2] fix: handle empty gremlin examples when building index --- .../index_op/build_gremlin_example_index.py | 18 +++++++--- .../test_build_gremlin_example_index.py | 34 +++++++++++++++---- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py index fb385a59c..4c5378fc1 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py @@ -38,14 +38,22 @@ def __init__( self.vector_index = vector_index def run(self, context: Dict[str, Any]) -> Dict[str, Any]: - # !: We have assumed that self.example is not empty + if not self.examples: + context["embed_dim"] = 0 + return context + queries = [example["query"] for example in self.examples] # TODO: refactor function chain async to avoid blocking examples_embedding = asyncio.run(get_embeddings_parallel(self.embedding, queries)) + + if not examples_embedding: + context["embed_dim"] = 0 + return context + embed_dim = len(examples_embedding[0]) - if len(self.examples) > 0: - vector_index = self.vector_index.from_name(embed_dim, self.vector_index_name) - vector_index.add(examples_embedding, self.examples) - vector_index.save_index_by_name(self.vector_index_name) + vector_index = self.vector_index.from_name(embed_dim, self.vector_index_name) + vector_index.add(examples_embedding, self.examples) + vector_index.save_index_by_name(self.vector_index_name) + context["embed_dim"] = embed_dim return context diff --git a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py index 239e377ae..41e6b1475 100644 --- a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py +++ b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py @@ -54,7 +54,10 @@ def test_init(self): self.assertEqual(self.index_builder.vector_index_name, "gremlin_examples") @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_with_examples(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with examples""" # Setup mocks @@ -78,7 +81,10 @@ def test_run_with_examples(self, mock_get_embeddings_parallel, mock_asyncio_run) self.assertEqual(context["embed_dim"], 3) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_with_empty_examples(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with empty examples""" # Create new mocks for this test @@ -98,12 +104,23 @@ def test_run_with_empty_examples(self, mock_get_embeddings_parallel, mock_asynci # Run the method context = {} - # This should raise an IndexError when trying to access examples_embedding[0] - with self.assertRaises(IndexError): - empty_index_builder.run(context) + # Empty examples should be handled gracefully without building vector index. + result = empty_index_builder.run(context) + + mock_asyncio_run.assert_not_called() + mock_get_embeddings_parallel.assert_not_called() + mock_vector_store_class.from_name.assert_not_called() + mock_vector_store_instance.add.assert_not_called() + mock_vector_store_instance.save_index_by_name.assert_not_called() + + self.assertEqual(result["embed_dim"], 0) + self.assertEqual(context["embed_dim"], 0) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_single_example(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with single example""" # Create new mocks for this test @@ -134,7 +151,10 @@ def test_run_single_example(self, mock_get_embeddings_parallel, mock_asyncio_run self.assertEqual(result["embed_dim"], 4) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_preserves_existing_context(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test that run method preserves existing context data""" # Setup mocks From 6cfa3ab8b79a31a40bafff301c02580fc8f58ba9 Mon Sep 17 00:00:00 2001 From: Gardenia-zx <3469959715@qq.com> Date: Wed, 6 May 2026 20:52:48 +0800 Subject: [PATCH 2/2] fix: address empty gremlin examples review comments --- .../nodes/index_node/build_gremlin_example_index.py | 2 +- .../operators/index_op/build_gremlin_example_index.py | 4 +++- .../operators/index_op/test_build_gremlin_example_index.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py b/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py index 697a88ca7..4dc915406 100644 --- a/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py +++ b/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py @@ -34,7 +34,7 @@ def node_init(self): # pylint: disable=import-outside-toplevel from hugegraph_llm.utils.vector_index_utils import get_vector_index_class - if not self.wk_input.examples: + if self.wk_input.examples is None: return CStatus(-1, "examples is required in BuildGremlinExampleIndexNode") examples = self.wk_input.examples vector_index = get_vector_index_class(index_settings.cur_vector_index) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py index 4c5378fc1..15aea5d1f 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py @@ -39,6 +39,7 @@ def __init__( def run(self, context: Dict[str, Any]) -> Dict[str, Any]: if not self.examples: + self.vector_index.clean(self.vector_index_name) context["embed_dim"] = 0 return context @@ -47,6 +48,7 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: examples_embedding = asyncio.run(get_embeddings_parallel(self.embedding, queries)) if not examples_embedding: + self.vector_index.clean(self.vector_index_name) context["embed_dim"] = 0 return context @@ -56,4 +58,4 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: vector_index.save_index_by_name(self.vector_index_name) context["embed_dim"] = embed_dim - return context + return context \ No newline at end of file diff --git a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py index 41e6b1475..fa0b37252 100644 --- a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py +++ b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py @@ -109,6 +109,7 @@ def test_run_with_empty_examples(self, mock_get_embeddings_parallel, mock_asynci mock_asyncio_run.assert_not_called() mock_get_embeddings_parallel.assert_not_called() + mock_vector_store_class.clean.assert_called_once_with("gremlin_examples") mock_vector_store_class.from_name.assert_not_called() mock_vector_store_instance.add.assert_not_called() mock_vector_store_instance.save_index_by_name.assert_not_called()