diff --git a/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py b/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py index 697a88ca7..4dc915406 100644 --- a/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py +++ b/hugegraph-llm/src/hugegraph_llm/nodes/index_node/build_gremlin_example_index.py @@ -34,7 +34,7 @@ def node_init(self): # pylint: disable=import-outside-toplevel from hugegraph_llm.utils.vector_index_utils import get_vector_index_class - if not self.wk_input.examples: + if self.wk_input.examples is None: return CStatus(-1, "examples is required in BuildGremlinExampleIndexNode") examples = self.wk_input.examples vector_index = get_vector_index_class(index_settings.cur_vector_index) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py index fb385a59c..15aea5d1f 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/build_gremlin_example_index.py @@ -38,14 +38,24 @@ def __init__( self.vector_index = vector_index def run(self, context: Dict[str, Any]) -> Dict[str, Any]: - # !: We have assumed that self.example is not empty + if not self.examples: + self.vector_index.clean(self.vector_index_name) + context["embed_dim"] = 0 + return context + queries = [example["query"] for example in self.examples] # TODO: refactor function chain async to avoid blocking examples_embedding = asyncio.run(get_embeddings_parallel(self.embedding, queries)) + + if not examples_embedding: + self.vector_index.clean(self.vector_index_name) + context["embed_dim"] = 0 + return context + embed_dim = len(examples_embedding[0]) - if len(self.examples) > 0: - vector_index = self.vector_index.from_name(embed_dim, self.vector_index_name) - vector_index.add(examples_embedding, self.examples) - vector_index.save_index_by_name(self.vector_index_name) + vector_index = self.vector_index.from_name(embed_dim, self.vector_index_name) + vector_index.add(examples_embedding, self.examples) + vector_index.save_index_by_name(self.vector_index_name) + context["embed_dim"] = embed_dim - return context + return context \ No newline at end of file diff --git a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py index 239e377ae..fa0b37252 100644 --- a/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py +++ b/hugegraph-llm/src/tests/operators/index_op/test_build_gremlin_example_index.py @@ -54,7 +54,10 @@ def test_init(self): self.assertEqual(self.index_builder.vector_index_name, "gremlin_examples") @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_with_examples(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with examples""" # Setup mocks @@ -78,7 +81,10 @@ def test_run_with_examples(self, mock_get_embeddings_parallel, mock_asyncio_run) self.assertEqual(context["embed_dim"], 3) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_with_empty_examples(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with empty examples""" # Create new mocks for this test @@ -98,12 +104,24 @@ def test_run_with_empty_examples(self, mock_get_embeddings_parallel, mock_asynci # Run the method context = {} - # This should raise an IndexError when trying to access examples_embedding[0] - with self.assertRaises(IndexError): - empty_index_builder.run(context) + # Empty examples should be handled gracefully without building vector index. + result = empty_index_builder.run(context) + + mock_asyncio_run.assert_not_called() + mock_get_embeddings_parallel.assert_not_called() + mock_vector_store_class.clean.assert_called_once_with("gremlin_examples") + mock_vector_store_class.from_name.assert_not_called() + mock_vector_store_instance.add.assert_not_called() + mock_vector_store_instance.save_index_by_name.assert_not_called() + + self.assertEqual(result["embed_dim"], 0) + self.assertEqual(context["embed_dim"], 0) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_single_example(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test run method with single example""" # Create new mocks for this test @@ -134,7 +152,10 @@ def test_run_single_example(self, mock_get_embeddings_parallel, mock_asyncio_run self.assertEqual(result["embed_dim"], 4) @patch('asyncio.run') - @patch('hugegraph_llm.utils.embedding_utils.get_embeddings_parallel') + @patch( + 'hugegraph_llm.operators.index_op.build_gremlin_example_index.get_embeddings_parallel', + new_callable=MagicMock, + ) def test_run_preserves_existing_context(self, mock_get_embeddings_parallel, mock_asyncio_run): """Test that run method preserves existing context data""" # Setup mocks