From a62dc790a345170fec21691f9623495cff30ce3d Mon Sep 17 00:00:00 2001 From: fzowl Date: Sat, 18 Jan 2025 15:16:52 +0100 Subject: [PATCH 01/10] Using VoyageAI's python package directly, allowing more features than langchain --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64a7157619..54815a0a5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -426,6 +426,7 @@ This makes it impossible to write stable unit tests, for example, or to obtain r ## 0.16.14 ### Enhancements +- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. ### Features From 30a1f9ce43d15708cccc8984faf0473780bd0137 Mon Sep 17 00:00:00 2001 From: fzowl Date: Tue, 28 Jan 2025 16:39:30 +0100 Subject: [PATCH 02/10] Using VoyageAI's python package directly, allowing more features than langchain --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54815a0a5c..57a659e155 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ ## 0.18.13 ### Enhancements +- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. ### Features @@ -426,7 +427,6 @@ This makes it impossible to write stable unit tests, for example, or to obtain r ## 0.16.14 ### Enhancements -- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. ### Features From 4478dc7f5834c6766aa2a2edf2dc1ceed22939f9 Mon Sep 17 00:00:00 2001 From: fzowl Date: Wed, 21 May 2025 13:28:07 +0200 Subject: [PATCH 03/10] Adding VoyageAI's v3.5 models config --- unstructured/embed/voyageai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unstructured/embed/voyageai.py b/unstructured/embed/voyageai.py index 35d231884d..9c6a69df5b 100644 --- a/unstructured/embed/voyageai.py +++ b/unstructured/embed/voyageai.py @@ -41,9 +41,9 @@ def get_batch_size(self): if self.batch_size is None: if self.model_name in ["voyage-2", "voyage-02"]: self.batch_size = DEFAULT_VOYAGE_2_BATCH_SIZE - elif self.model_name == "voyage-3-lite": + elif self.model_name in ["voyage-3.5-lite", "voyage-3-lite"]: self.batch_size = DEFAULT_VOYAGE_3_LITE_BATCH_SIZE - elif self.model_name == "voyage-3": + elif self.model_name in ["voyage-3.5", "voyage-3"]: self.batch_size = DEFAULT_VOYAGE_3_BATCH_SIZE else: self.batch_size = DEFAULT_BATCH_SIZE From 400ceeb418e74f42497b6037e87a55cc34ea01af Mon Sep 17 00:00:00 2001 From: fzowl Date: Wed, 21 May 2025 13:32:40 +0200 Subject: [PATCH 04/10] Adding VoyageAI's v3.5 models config --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57a659e155..15d0c6c7c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ ## 0.18.13 ### Enhancements -- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. +- **VoyageAI's v3.5 models** ### Features From e531d97c8ac98f0df10ea43b5aca7d631718c931 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:11:01 +0200 Subject: [PATCH 05/10] Supporting the contextual model Counting the tokens and batching --- test_unstructured/embed/test_voyageai.py | 217 +++++++++++++++++++++++ unstructured/embed/voyageai.py | 213 +++++++++++++++++----- 2 files changed, 381 insertions(+), 49 deletions(-) diff --git a/test_unstructured/embed/test_voyageai.py b/test_unstructured/embed/test_voyageai.py index f0a24bde7c..1273d9b283 100644 --- a/test_unstructured/embed/test_voyageai.py +++ b/test_unstructured/embed/test_voyageai.py @@ -10,6 +10,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): embed_response.embeddings = [[1], [2]] mock_client = mocker.MagicMock() mock_client.embed.return_value = embed_response + mock_client.tokenize.return_value = [[1], [1]] # Mock token counts # Mock get_client to return our mock_client mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) @@ -23,3 +24,219 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): assert len(elements) == 2 assert elements[0].to_dict()["text"] == "This is sentence 1" assert elements[1].to_dict()["text"] == "This is sentence 2" + + +def test_embed_documents_voyage_3_5(mocker): + """Test embedding with voyage-3.5 model.""" + embed_response = Mock() + embed_response.embeddings = [[1.0] * 1024, [2.0] * 1024] + mock_client = mocker.MagicMock() + mock_client.embed.return_value = embed_response + mock_client.tokenize.return_value = [[1, 2, 3], [1, 2]] # Mock token counts + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + elements = encoder.embed_documents( + elements=[Text("Test document 1"), Text("Test document 2")], + ) + assert len(elements) == 2 + assert len(elements[0].embeddings) == 1024 + assert len(elements[1].embeddings) == 1024 + + +def test_embed_documents_voyage_3_5_lite(mocker): + """Test embedding with voyage-3.5-lite model.""" + embed_response = Mock() + embed_response.embeddings = [[1.0] * 512, [2.0] * 512, [3.0] * 512] + mock_client = mocker.MagicMock() + mock_client.embed.return_value = embed_response + mock_client.tokenize.return_value = [[1], [1], [1]] # Mock token counts + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5-lite") + ) + elements = encoder.embed_documents( + elements=[Text("Test 1"), Text("Test 2"), Text("Test 3")], + ) + assert len(elements) == 3 + assert all(len(e.embeddings) == 512 for e in elements) + + +def test_embed_documents_contextual_model(mocker): + """Test embedding with voyage-context-3 model.""" + # Mock contextualized_embed response + contextualized_response = Mock() + result_item = Mock() + result_item.embeddings = [[1.0] * 1024, [2.0] * 1024] + contextualized_response.results = [result_item] + + mock_client = mocker.MagicMock() + mock_client.contextualized_embed.return_value = contextualized_response + mock_client.tokenize.return_value = [[1, 2], [1, 2, 3]] # Mock token counts + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-context-3") + ) + elements = encoder.embed_documents( + elements=[Text("Context document 1"), Text("Context document 2")], + ) + assert len(elements) == 2 + assert len(elements[0].embeddings) == 1024 + assert len(elements[1].embeddings) == 1024 + # Verify contextualized_embed was called + mock_client.contextualized_embed.assert_called_once() + + +def test_count_tokens(mocker): + """Test token counting functionality.""" + mock_client = mocker.MagicMock() + mock_client.tokenize.return_value = [[1, 2], [1, 2, 3, 4, 5]] # Different token counts + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + texts = ["short text", "this is a longer text with more tokens"] + token_counts = encoder.count_tokens(texts) + + assert len(token_counts) == 2 + assert token_counts[0] == 2 + assert token_counts[1] == 5 + + +def test_count_tokens_empty_list(mocker): + """Test token counting with empty list.""" + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mocker.MagicMock()) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + token_counts = encoder.count_tokens([]) + assert token_counts == [] + + +def test_get_token_limit(mocker): + """Test getting token limit for different models.""" + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mocker.MagicMock()) + + # Test voyage-3.5 model + config = VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + assert config.get_token_limit() == 320_000 + + # Test voyage-3.5-lite model + config_lite = VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5-lite") + assert config_lite.get_token_limit() == 1_000_000 + + # Test context model + config_context = VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-context-3") + assert config_context.get_token_limit() == 32_000 + + # Test voyage-2 model + config_v2 = VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-2") + assert config_v2.get_token_limit() == 320_000 + + # Test unknown model (should use default) + config_unknown = VoyageAIEmbeddingConfig(api_key="api_key", model_name="unknown-model") + assert config_unknown.get_token_limit() == 120_000 + + +def test_is_context_model(mocker): + """Test the _is_context_model helper method.""" + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mocker.MagicMock()) + + # Test with context model + encoder_context = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-context-3") + ) + assert encoder_context._is_context_model() is True + + # Test with regular model + encoder_regular = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + assert encoder_regular._is_context_model() is False + + +def test_build_batches_with_token_limits(mocker): + """Test that batching respects token limits.""" + mock_client = mocker.MagicMock() + # Simulate different token counts for each text + mock_client.tokenize.return_value = [[1] * 10, [1] * 20, [1] * 15, [1] * 25] + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-2") + ) + texts = ["text1", "text2", "text3", "text4"] + batches = list(encoder._build_batches(texts, mock_client)) + + # Should create at least one batch + assert len(batches) >= 1 + # Total texts should be preserved + total_texts = sum(len(batch) for batch in batches) + assert total_texts == len(texts) + + +def test_embed_query(mocker): + """Test embedding a single query.""" + embed_response = Mock() + embed_response.embeddings = [[1.0] * 1024] + mock_client = mocker.MagicMock() + mock_client.embed.return_value = embed_response + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + embedding = encoder.embed_query("test query") + + assert len(embedding) == 1024 + # Verify embed was called with input_type="query" + mock_client.embed.assert_called_once() + call_kwargs = mock_client.embed.call_args[1] + assert call_kwargs["input_type"] == "query" + + +def test_embed_documents_with_output_dimension(mocker): + """Test embedding with custom output dimension.""" + embed_response = Mock() + embed_response.embeddings = [[1.0] * 512, [2.0] * 512] + mock_client = mocker.MagicMock() + mock_client.embed.return_value = embed_response + mock_client.tokenize.return_value = [[1], [1]] + + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig( + api_key="api_key", model_name="voyage-3.5", output_dimension=512 + ) + ) + elements = encoder.embed_documents( + elements=[Text("Test 1"), Text("Test 2")], + ) + assert len(elements) == 2 + # Verify output_dimension was passed + call_kwargs = mock_client.embed.call_args[1] + assert call_kwargs["output_dimension"] == 512 + + +def test_embed_documents_empty_list(mocker): + """Test embedding empty list of documents.""" + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mocker.MagicMock()) + + encoder = VoyageAIEmbeddingEncoder( + config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-3.5") + ) + elements = encoder.embed_documents(elements=[]) + assert elements == [] diff --git a/unstructured/embed/voyageai.py b/unstructured/embed/voyageai.py index 9c6a69df5b..8a496a68a3 100644 --- a/unstructured/embed/voyageai.py +++ b/unstructured/embed/voyageai.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import TYPE_CHECKING, Iterable, List, Optional, cast +from typing import TYPE_CHECKING, Iterable, List, Optional import numpy as np from pydantic import Field, SecretStr @@ -11,10 +11,29 @@ if TYPE_CHECKING: from voyageai import Client -DEFAULT_VOYAGE_2_BATCH_SIZE = 72 -DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30 -DEFAULT_VOYAGE_3_BATCH_SIZE = 10 -DEFAULT_BATCH_SIZE = 7 +# Token limits for different VoyageAI models +VOYAGE_TOTAL_TOKEN_LIMITS = { + "voyage-context-3": 32_000, + "voyage-3.5-lite": 1_000_000, + "voyage-3.5": 320_000, + "voyage-2": 320_000, + "voyage-02": 320_000, + "voyage-3-large": 120_000, + "voyage-code-3": 120_000, + "voyage-large-2-instruct": 120_000, + "voyage-finance-2": 120_000, + "voyage-multilingual-2": 120_000, + "voyage-law-2": 120_000, + "voyage-large-2": 120_000, + "voyage-3": 120_000, + "voyage-3-lite": 120_000, + "voyage-code-2": 120_000, + "voyage-3-m-exp": 120_000, + "voyage-multimodal-3": 120_000, +} + +# Batch size for embedding requests (max documents per batch) +MAX_BATCH_SIZE = 1000 class VoyageAIEmbeddingConfig(EmbeddingConfig): @@ -37,17 +56,9 @@ def get_client(self) -> "Client": api_key=self.api_key.get_secret_value(), ) - def get_batch_size(self): - if self.batch_size is None: - if self.model_name in ["voyage-2", "voyage-02"]: - self.batch_size = DEFAULT_VOYAGE_2_BATCH_SIZE - elif self.model_name in ["voyage-3.5-lite", "voyage-3-lite"]: - self.batch_size = DEFAULT_VOYAGE_3_LITE_BATCH_SIZE - elif self.model_name in ["voyage-3.5", "voyage-3"]: - self.batch_size = DEFAULT_VOYAGE_3_BATCH_SIZE - else: - self.batch_size = DEFAULT_BATCH_SIZE - return self.batch_size + def get_token_limit(self) -> int: + """Get the token limit for the current model.""" + return VOYAGE_TOTAL_TOKEN_LIMITS.get(self.model_name, 120_000) @dataclass @@ -70,53 +81,157 @@ def is_unit_vector(self) -> bool: exemplary_embedding = self.get_exemplary_embedding() return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) - def embed_documents(self, elements: List[Element]) -> List[Element]: - client = self.config.get_client() - embeddings: List[List[float]] = [] - - _iter = self._get_batch_iterator(elements) - for i in _iter: - r = client.embed( - texts=[str(e) for e in elements[i : i + self.config.get_batch_size()]], + def _is_context_model(self) -> bool: + """Check if the model is a contextualized embedding model.""" + return "context" in self.config.model_name + + def _build_batches(self, texts: List[str], client: "Client") -> Iterable[List[str]]: + """ + Generate batches of texts based on token limits. + + Args: + texts: List of texts to batch. + client: VoyageAI client instance to use for tokenization. + + Yields: + Batches of texts as lists. + """ + if not texts: + return + + max_tokens_per_batch = self.config.get_token_limit() + current_batch: List[str] = [] + current_batch_tokens = 0 + + # Tokenize all texts in one API call + all_token_lists = client.tokenize(texts, model=self.config.model_name) + token_counts = [len(tokens) for tokens in all_token_lists] + + for i, text in enumerate(texts): + n_tokens = token_counts[i] + + # Check if adding this text would exceed limits + if current_batch and ( + len(current_batch) >= MAX_BATCH_SIZE + or (current_batch_tokens + n_tokens > max_tokens_per_batch) + ): + # Yield the current batch and start a new one + yield current_batch + current_batch = [] + current_batch_tokens = 0 + + current_batch.append(text) + current_batch_tokens += n_tokens + + # Yield the last batch (always has at least one text) + if current_batch: + yield current_batch + + def _embed_batch( + self, batch: List[str], client: "Client", input_type: str = "document" + ) -> List[List[float]]: + """ + Embed a batch of texts using the appropriate method for the model. + + Args: + batch: List of texts to embed. + client: VoyageAI client instance to use for embedding. + input_type: Type of input ("document" or "query"). + + Returns: + List of embedding vectors. + """ + if self._is_context_model(): + result = client.contextualized_embed( + inputs=[batch], + model=self.config.model_name, + input_type=input_type, + output_dimension=self.config.output_dimension, + ) + return [list(emb) for emb in result.results[0].embeddings] + else: + result = client.embed( + texts=batch, model=self.config.model_name, - input_type="document", + input_type=input_type, truncation=self.config.truncation, output_dimension=self.config.output_dimension, - ).embeddings - embeddings.extend(cast(Iterable[List[float]], r)) - return self._add_embeddings_to_elements(elements, embeddings) + ) + return [list(emb) for emb in result.embeddings] + + def embed_documents(self, elements: List[Element]) -> List[Element]: + """ + Embed documents with automatic batching based on token limits. + + Args: + elements: List of elements to embed. + + Returns: + List of elements with embeddings added. + """ + if not elements: + return [] - def embed_query(self, query: str) -> List[float]: client = self.config.get_client() - return client.embed( - texts=[query], - model=self.config.model_name, - input_type="query", - truncation=self.config.truncation, - output_dimension=self.config.output_dimension, - ).embeddings[0] + texts = [str(e) for e in elements] + all_embeddings: List[List[float]] = [] - @staticmethod - def _add_embeddings_to_elements(elements, embeddings) -> List[Element]: - assert len(elements) == len(embeddings) - elements_w_embedding = [] - for i, element in enumerate(elements): - element.embeddings = embeddings[i] - elements_w_embedding.append(element) - return elements + # Process each batch + batches = list(self._build_batches(texts, client)) - def _get_batch_iterator(self, elements: List[Element]) -> Iterable: if self.config.show_progress_bar: try: from tqdm.auto import tqdm # type: ignore + + batches = tqdm(batches, desc="Embedding batches") except ImportError as e: raise ImportError( "Must have tqdm installed if `show_progress_bar` is set to True. " "Please install with `pip install tqdm`." ) from e - _iter = tqdm(range(0, len(elements), self.config.get_batch_size())) - else: - _iter = range(0, len(elements), self.config.get_batch_size()) # type: ignore + for batch in batches: + batch_embeddings = self._embed_batch(batch, client, input_type="document") + all_embeddings.extend(batch_embeddings) + + return self._add_embeddings_to_elements(elements, all_embeddings) + + def embed_query(self, query: str) -> List[float]: + """ + Embed a single query string. + + Args: + query: Query string to embed. + + Returns: + Embedding vector. + """ + client = self.config.get_client() + batch_embeddings = self._embed_batch([query], client, input_type="query") + return batch_embeddings[0] + + def count_tokens(self, texts: List[str]) -> List[int]: + """ + Count tokens for the given texts. + + Args: + texts: List of texts to count tokens for. + + Returns: + List of token counts for each text. + """ + if not texts: + return [] - return _iter + client = self.config.get_client() + token_lists = client.tokenize(texts, model=self.config.model_name) + return [len(token_list) for token_list in token_lists] + + @staticmethod + def _add_embeddings_to_elements(elements, embeddings) -> List[Element]: + assert len(elements) == len(embeddings) + elements_w_embedding = [] + for i, element in enumerate(elements): + element.embeddings = embeddings[i] + elements_w_embedding.append(element) + return elements From 30f5c4a2aa20351667d48781e0b3164f571301a8 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:12:29 +0200 Subject: [PATCH 06/10] Supporting the contextual model Counting the tokens and batching --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d0c6c7c1..57a659e155 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ ## 0.18.13 ### Enhancements -- **VoyageAI's v3.5 models** +- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. ### Features From e24da54e71f4d0ba53753ee4b1c6d2c806061750 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:14:38 +0200 Subject: [PATCH 07/10] Supporting the contextual model Counting the tokens and batching --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57a659e155..15d0c6c7c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ ## 0.18.13 ### Enhancements -- **Refactoring the VoyageAI integration** to use voyageai package directly, allowing extra features. +- **VoyageAI's v3.5 models** ### Features From f3f8a71f2b01b7249f3559ef8777e74c6df9fd27 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:15:40 +0200 Subject: [PATCH 08/10] Supporting the contextual model Counting the tokens and batching --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d0c6c7c1..64a7157619 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,6 @@ ## 0.18.13 ### Enhancements -- **VoyageAI's v3.5 models** ### Features From 50b7d53c15ac7d262555be91e508bbb614927b9f Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:22:39 +0200 Subject: [PATCH 09/10] Supporting the contextual model Counting the tokens and batching --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64a7157619..f694b47014 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Enhancement - Speed up function _assign_hash_ids by 34% (codeflash) +- Improve the VoyageAI integration +- Add voyage-context-3 support ### Features From 17348488f18ba9dbbbf9728e90b224c32ab16777 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 16 Oct 2025 14:27:23 +0200 Subject: [PATCH 10/10] Supporting the contextual model Counting the tokens and batching --- CHANGELOG.md | 12 ++++++++++-- unstructured/__version__.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f694b47014..c7e66363db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,20 @@ -## 0.18.16 +## 0.18.17 ### Enhancement -- Speed up function _assign_hash_ids by 34% (codeflash) - Improve the VoyageAI integration - Add voyage-context-3 support ### Features +### Fixes + +## 0.18.16 + +### Enhancement +- Speed up function _assign_hash_ids by 34% (codeflash) + +### Features + ### Fixes - Bumped dependencies via pip-compile to address the following CVEs: - **authlib**: GHSA-pq5p-34cr-23v9 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c71f847e71..76895ff981 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.18.16" # pragma: no cover +__version__ = "0.18.17" # pragma: no cover