From a7c7dbf51d87ab05e29cb128de97105f1db49ea2 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Mon, 2 Mar 2026 17:42:45 -0800 Subject: [PATCH 1/2] UI/API: S3 Vectors destination connector --- .../workflow/destinations/overview.mdx | 1 + .../workflow/destinations/s3-vectors.mdx | 59 ++++++++++++ docs.json | 2 + .../s3-vectors-platform.mdx | 10 ++ snippets/general-shared-text/s3-vectors.mdx | 92 +++++++++++++++++++ ui/connectors.mdx | 1 + ui/destinations/overview.mdx | 1 + ui/destinations/s3-vectors.mdx | 42 +++++++++ ui/destinations/s3.mdx | 8 ++ ui/sources/s3.mdx | 8 ++ 10 files changed, 224 insertions(+) create mode 100644 api-reference/workflow/destinations/s3-vectors.mdx create mode 100644 snippets/general-shared-text/s3-vectors-platform.mdx create mode 100644 snippets/general-shared-text/s3-vectors.mdx create mode 100644 ui/destinations/s3-vectors.mdx diff --git a/api-reference/workflow/destinations/overview.mdx b/api-reference/workflow/destinations/overview.mdx index 5f8679a3..a54c67e1 100644 --- a/api-reference/workflow/destinations/overview.mdx +++ b/api-reference/workflow/destinations/overview.mdx @@ -46,6 +46,7 @@ For the list of specific settings, see: - [Redis](/api-reference/workflow/destinations/redis) (`REDIS` for the Python SDK or `redis` for `curl` or Postman) - [Snowflake](/api-reference/workflow/destinations/snowflake) (`SNOWFLAKE` for the Python SDK or `snowflake` for `curl` or Postman) - [S3](/api-reference/workflow/destinations/s3) (`S3` for the Python SDK or `s3` for `curl` or Postman) +- [S3 Vectors](/api-reference/workflow/destinations/s3-vectors) (`S3_VECTORS` for the Python SDK or `s3_vectors` for `curl` or Postman) - [Teradata](/api-reference/workflow/destinations/teradata-sql) (`TERADATA` for the Python SDK or `teradata` for `curl` or Postman) - [Weaviate](/api-reference/workflow/destinations/weaviate) (`WEAVIATE` for the Python SDK or `weaviate` for `curl` or Postman) diff --git a/api-reference/workflow/destinations/s3-vectors.mdx b/api-reference/workflow/destinations/s3-vectors.mdx new file mode 100644 index 00000000..5043aa23 --- /dev/null +++ b/api-reference/workflow/destinations/s3-vectors.mdx @@ -0,0 +1,59 @@ +--- +title: S3 Vectors +--- + + + This article covers connecting Unstructured to Amazon S3 Vectors. + + For information about connecting Unstructured to Amazon S3 without support for Amazon S3 Vectors instead, see + [S3](/api-reference/workflow/destinations/s3). + + +import FirstTimeAPIDestinationConnector from '/snippets/general-shared-text/first-time-api-destination-connector.mdx'; + + + +Send processed data from Unstructured to Amazon S3 Vectors. + +The requirements are as follows. + +import s3VectorsPrerequisites from '/snippets/general-shared-text/s3-vectors.mdx'; + + + +## Add an access policy to an existing S3 Vectors bucket + +import S3VectorsBucketPolicy from '/snippets/general-shared-text/s3-vectors-bucket-policy.mdx'; + + + +## Create an S3 Vectors bucket with AWS CloudFormation + +import S3VectorsBucketCloudFormation from '/snippets/general-shared-text/s3-vectors-cf-setup.mdx'; + + + +## Create an S3 Vectors bucket with the AWS CLI + +import S3VectorsBucketCLI from '/snippets/general-shared-text/s3-vectors-cli-setup.mdx'; + + + +## Create the destination connector + +To create an S3 Vectorsdestination connector, see the following examples. + +import s3VectorsSDK from '/snippets/destination_connectors/s3-vectors_sdk.mdx'; +import s3VectorsAPIRESTCreate from '/snippets/destination_connectors/s3-vectors_rest_create.mdx'; + + + + + + +Replace the preceding placeholders as follows: + +import s3VectorsAPIPlaceholders from '/snippets/general-shared-text/s3-vectors-api-placeholders.mdx'; + + + diff --git a/docs.json b/docs.json index 3f7b44e0..d622bc05 100644 --- a/docs.json +++ b/docs.json @@ -94,6 +94,7 @@ "ui/destinations/qdrant", "ui/destinations/redis", "ui/destinations/s3", + "ui/destinations/s3-vectors", "ui/destinations/snowflake", "ui/destinations/teradata-sql", "ui/destinations/weaviate" @@ -211,6 +212,7 @@ "api-reference/workflow/destinations/qdrant", "api-reference/workflow/destinations/redis", "api-reference/workflow/destinations/s3", + "api-reference/workflow/destinations/s3-vectors", "api-reference/workflow/destinations/snowflake", "api-reference/workflow/destinations/teradata-sql", "api-reference/workflow/destinations/weaviate" diff --git a/snippets/general-shared-text/s3-vectors-platform.mdx b/snippets/general-shared-text/s3-vectors-platform.mdx new file mode 100644 index 00000000..b89fe79b --- /dev/null +++ b/snippets/general-shared-text/s3-vectors-platform.mdx @@ -0,0 +1,10 @@ +Fill in the following fields: + +- **Name** (_required_): A unique name for this connector. +- **Region** (_required_): The AWS Region (such as `us-east-1`) of the target Amazon S3 Vectors bucket. +- **Key** (_required_): The AWS access key ID for the target AWS IAM principal that has the appropriate access to the target bucket. +- **Secret** (_required_): The AWS secret access key for the corresponding AWS access key ID. +- **Vector Bucket Name** (_required_): The name of the target bucket. +- **Index Name** (_required_): The name of the target index in the bucket. += **Batch Size**: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. +- **Key Prefix**: A string prepend to each vector key. The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors.mdx b/snippets/general-shared-text/s3-vectors.mdx new file mode 100644 index 00000000..4adec991 --- /dev/null +++ b/snippets/general-shared-text/s3-vectors.mdx @@ -0,0 +1,92 @@ +- An Amazon S3 Vectors bucket. + + - Learn how to [create an S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-create.html). + - Learn how to [get the name of an existing S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-list.html). + +- The AWS Region (such as `us-east-1`) of the target S3 Vectors bucket. Learn how to [get the Region of an existing S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-details.html). +- An index for the target S3 Vectors bucket. + + - Learn how to [create an index](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-create-index.html). + - Learn how to [get the name of an existing index](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-index-list.html). + + When creating an index, be sure to specify these settings: + + - **Vector index name** can be any allowed name pattern. + - For **Dimension**, only specify a number that is supported by Unstructured's available embedding models. + - For **Distance metric**, only specify **Cosine**. + - For **Metadata configuration** under **Additional settings**, Unstructured recommends that you specify the following 10 keys for **Non-filterable metadata**: + + - `text` + - `link_urls` + - `link_texts` + - `coordinates-points` + - `coordinates-system` + - `data_source-url` + - `data_source-record_locator` + - `data_source-date_created` + - `data_source-date_modified` + - `data_source-date_processed` + + - There are no Unstructured-specific requirements for **Encryption** or **Tags**. + + [Learn more about these index settings](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-create-index.html). + +- For the target index, the number of dimensions that are generated. + Learn how to [get the index's number of dimensions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-index-list.html). + +- The AWS access key ID and the AWS secret access key for the target AWS IAM principal (such as an IAM user or group) that has the appropriate access to the S3 Vectors bucket. + + - If you use identity-based policies to control access, the target IAM principal must have at minimum the following access permissions. Replace the following placeholders: + + - Replace `` with the AWS Region short ID of the target S3 Vectors bucket. + - Replace `` with the AWS account ID of the target S3 Vectors bucket. + - Replace `` with the name of the target S3 Vectors bucket. + - Replace `` with the name of the target index. + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AccountBucketListing", + "Effect": "Allow", + "Action": [ + "s3vectors:ListVectorBuckets" + ], + "Resource": "*" + }, + { + "Sid": "AllowBucketAccess", + "Effect": "Allow", + "Action": [ + "s3vectors:GetVectorBucket", + "s3vectors:ListIndexes" + ], + "Resource": "arn:aws:s3vectors:::bucket/" + }, + { + "Sid": "AllowIndexAccess", + "Effect": "Allow", + "Action": [ + "s3vectors:ListIndexes", + "s3vectors:GetIndex", + "s3vectors:ListVectors", + "s3vectors:QueryVectors", + "s3vectors:PutVectors", + "s3vectors:GetVectors", + "s3vectors:DeleteVectors" + ], + "Resource": "arn:aws:s3vectors:::bucket//index/" + } + ] + } + ``` + + [Learn more about these S3 Vectors access permissions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-permissions.html). + + - Learn how to attach an access policy to an IAM [user](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_change-permissions.html#users_change_permissions-add-console), + [group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html), + or [role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user.html). + - Learn how to [create and manage AWS access key IDs and their related AWS secret access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). + - Learn how to [switch from an IAM user to a role for temporary access](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_manage-assume.html). + diff --git a/ui/connectors.mdx b/ui/connectors.mdx index b2df9c27..dfa7ab4a 100644 --- a/ui/connectors.mdx +++ b/ui/connectors.mdx @@ -64,6 +64,7 @@ If your source is not listed here, you might still be able to connect Unstructur - [Qdrant](/ui/destinations/qdrant) - [Redis](/ui/destinations/redis) - [S3](/ui/destinations/s3) +- [S3 Vectors](/ui/destinations/s3-vectors) - [Snowflake](/ui/destinations/snowflake) - [Teradata](/ui/destinations/teradata-sql) - [Weaviate](/ui/destinations/weaviate) diff --git a/ui/destinations/overview.mdx b/ui/destinations/overview.mdx index ac6cf894..07814ce9 100644 --- a/ui/destinations/overview.mdx +++ b/ui/destinations/overview.mdx @@ -51,6 +51,7 @@ To create a destination connector: - [Qdrant](/ui/destinations/qdrant) - [Redis](/ui/destinations/redis) - [S3](/ui/destinations/s3) + - [S3 Vectors](/ui/destinations/s3-vectors) - [Snowflake](/ui/destinations/snowflake) - [Teradata](/ui/destinations/teradata-sql) - [Weaviate](/ui/destinations/weaviate) diff --git a/ui/destinations/s3-vectors.mdx b/ui/destinations/s3-vectors.mdx new file mode 100644 index 00000000..86cbeb49 --- /dev/null +++ b/ui/destinations/s3-vectors.mdx @@ -0,0 +1,42 @@ +--- +title: S3 Vectors +--- + + + This article covers connecting Unstructured to Amazon S3 Vectors. + + For information about connecting Unstructured to Amazon S3 without support for Amazon S3 Vectors instead, see + [S3](/ui/destinations/s3). + + +import FirstTimeUIDestinationConnector from '/snippets/general-shared-text/first-time-ui-destination-connector.mdx'; + + + +Send processed data from Unstructured to Amazon S3 Vectors. + +The requirements are as follows. + +import S3VectorsPrerequisites from '/snippets/general-shared-text/s3-vectors.mdx'; + + + +## Create the destination connector + +To create the destination connector: + +1. On the sidebar, click **Connectors**. +2. Click **Destinations**. +3. Click **New** or **Create Connector**. +4. Give the connector some unique **Name**. +5. In the **Provider** area, click **Amazon S3 Vectors**. +6. Click **Continue**. +7. Follow the on-screen instructions to fill in the fields as described later on this page. +8. Click **Save and Test**. + +import S3VectorsFields from '/snippets/general-shared-text/s3-vectors-platform.mdx'; + + + + + diff --git a/ui/destinations/s3.mdx b/ui/destinations/s3.mdx index 13f1fc08..77f1966e 100644 --- a/ui/destinations/s3.mdx +++ b/ui/destinations/s3.mdx @@ -2,6 +2,14 @@ title: S3 --- + + This article covers connecting Unstructured to Amazon S3 without support for Amazon S3 + Vectors. + + For information about connecting Unstructured to Amazon S3 Vectors instead, see + [S3 Vectors](/ui/destinations/s3-vectors). + + import FirstTimeUIDestinationConnector from '/snippets/general-shared-text/first-time-ui-destination-connector.mdx'; diff --git a/ui/sources/s3.mdx b/ui/sources/s3.mdx index c375872d..66cc5325 100644 --- a/ui/sources/s3.mdx +++ b/ui/sources/s3.mdx @@ -2,6 +2,14 @@ title: S3 --- + + This article covers connecting Unstructured to Amazon S3 as a source without support for Amazon S3 + Vectors. + + For information about connecting Unstructured to Amazon S3 Vectors as a destination only, see + [S3 Vectors](/ui/destinations/s3-vectors). + + import FirstTimeUISourceConnector from '/snippets/general-shared-text/first-time-ui-source-connector.mdx'; From 29c3412f397bc0fff934e4bf9d59296f9d3af8d9 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Tue, 3 Mar 2026 10:36:12 -0800 Subject: [PATCH 2/2] Added code examples --- .../workflow/destinations/s3-vectors.mdx | 24 ++------------ .../s3_vectors_rest_create.mdx | 25 +++++++++++++++ .../destination_connectors/s3_vectors_sdk.mdx | 32 +++++++++++++++++++ .../s3-vectors-api-placeholders.mdx | 11 +++++++ .../s3-vectors-platform.mdx | 4 +-- snippets/general-shared-text/s3-vectors.mdx | 2 +- 6 files changed, 74 insertions(+), 24 deletions(-) create mode 100644 snippets/destination_connectors/s3_vectors_rest_create.mdx create mode 100644 snippets/destination_connectors/s3_vectors_sdk.mdx create mode 100644 snippets/general-shared-text/s3-vectors-api-placeholders.mdx diff --git a/api-reference/workflow/destinations/s3-vectors.mdx b/api-reference/workflow/destinations/s3-vectors.mdx index 5043aa23..4c346e85 100644 --- a/api-reference/workflow/destinations/s3-vectors.mdx +++ b/api-reference/workflow/destinations/s3-vectors.mdx @@ -21,30 +21,12 @@ import s3VectorsPrerequisites from '/snippets/general-shared-text/s3-vectors.mdx -## Add an access policy to an existing S3 Vectors bucket - -import S3VectorsBucketPolicy from '/snippets/general-shared-text/s3-vectors-bucket-policy.mdx'; - - - -## Create an S3 Vectors bucket with AWS CloudFormation - -import S3VectorsBucketCloudFormation from '/snippets/general-shared-text/s3-vectors-cf-setup.mdx'; - - - -## Create an S3 Vectors bucket with the AWS CLI - -import S3VectorsBucketCLI from '/snippets/general-shared-text/s3-vectors-cli-setup.mdx'; - - - ## Create the destination connector -To create an S3 Vectorsdestination connector, see the following examples. +To create an S3 Vectors destination connector, see the following examples. -import s3VectorsSDK from '/snippets/destination_connectors/s3-vectors_sdk.mdx'; -import s3VectorsAPIRESTCreate from '/snippets/destination_connectors/s3-vectors_rest_create.mdx'; +import s3VectorsSDK from '/snippets/destination_connectors/s3_vectors_sdk.mdx'; +import s3VectorsAPIRESTCreate from '/snippets/destination_connectors/s3_vectors_rest_create.mdx'; diff --git a/snippets/destination_connectors/s3_vectors_rest_create.mdx b/snippets/destination_connectors/s3_vectors_rest_create.mdx new file mode 100644 index 00000000..6ae597b4 --- /dev/null +++ b/snippets/destination_connectors/s3_vectors_rest_create.mdx @@ -0,0 +1,25 @@ +```bash curl +curl --request 'POST' --location \ +"$UNSTRUCTURED_API_URL/destinations" \ +--header 'accept: application/json' \ +--header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ +--header 'content-type: application/json' \ +--data \ +'{ + "name": "", + "type": "s3_vectors", + "config": { + "region": "", + "access_config": { + "key": "", + "secret": "", + "token": "" + }, + "ambient_credentials": "true|false", + "vector_bucket_name": "", + "index_name": "", + "key_prefix": "", + "batch_size": + } +}' +``` \ No newline at end of file diff --git a/snippets/destination_connectors/s3_vectors_sdk.mdx b/snippets/destination_connectors/s3_vectors_sdk.mdx new file mode 100644 index 00000000..8cdb6f29 --- /dev/null +++ b/snippets/destination_connectors/s3_vectors_sdk.mdx @@ -0,0 +1,32 @@ +```python Python SDK +import os + +from unstructured_client import UnstructuredClient +from unstructured_client.models.operations import CreateDestinationRequest +from unstructured_client.models.shared import CreateDestinationConnector + +with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: + response = client.destinations.create_destination( + request=CreateDestinationRequest( + create_destination_connector=CreateDestinationConnector( + name="", + type="s3_vectors", + config={ + "region": "", + "access_config": { + "key": "", + "secret": "", + "token": "" + }, + "ambient_credentials": "true|false", + "vector_bucket_name": "", + "index_name": "", + "key_prefix": "", + "batch_size": + } + ) + ) + ) + + print(response.destination_connector_information) +``` \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors-api-placeholders.mdx b/snippets/general-shared-text/s3-vectors-api-placeholders.mdx new file mode 100644 index 00000000..358097da --- /dev/null +++ b/snippets/general-shared-text/s3-vectors-api-placeholders.mdx @@ -0,0 +1,11 @@ +- `` (_required_) - A unique name for this connector. +- `` (_required_): The AWS Region (such as `us-east-1`) of the target Amazon S3 Vectors bucket. +- `` (_required_): The AWS access key ID for the target AWS IAM principal that has the appropriate access to the target bucket. +- `` (_required_): The AWS secret access key for the corresponding AWS access key ID. +- `` (_required_): The name of the target bucket. +- `` (_required_): The name of the target index in the bucket. +- ``: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. +- ``: Some string to prepend to each vector key. Prepending a string to each vector key can be useful for distinguishing between different + datasets in the same bucket. + Learn more about [vector keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-vectors.html). + The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors-platform.mdx b/snippets/general-shared-text/s3-vectors-platform.mdx index b89fe79b..5ba649e0 100644 --- a/snippets/general-shared-text/s3-vectors-platform.mdx +++ b/snippets/general-shared-text/s3-vectors-platform.mdx @@ -6,5 +6,5 @@ Fill in the following fields: - **Secret** (_required_): The AWS secret access key for the corresponding AWS access key ID. - **Vector Bucket Name** (_required_): The name of the target bucket. - **Index Name** (_required_): The name of the target index in the bucket. -= **Batch Size**: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. -- **Key Prefix**: A string prepend to each vector key. The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file +- **Batch Size**: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. +- **Key Prefix**: Some string to prepend to each vector key. The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors.mdx b/snippets/general-shared-text/s3-vectors.mdx index 4adec991..b44b9e82 100644 --- a/snippets/general-shared-text/s3-vectors.mdx +++ b/snippets/general-shared-text/s3-vectors.mdx @@ -82,7 +82,7 @@ } ``` - [Learn more about these S3 Vectors access permissions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-permissions.html). + [Learn more about these S3 Vectors access permissions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-access-management.html). - Learn how to attach an access policy to an IAM [user](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_change-permissions.html#users_change_permissions-add-console), [group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html),