diff --git a/api-reference/workflow/destinations/overview.mdx b/api-reference/workflow/destinations/overview.mdx index 5f8679a3..a54c67e1 100644 --- a/api-reference/workflow/destinations/overview.mdx +++ b/api-reference/workflow/destinations/overview.mdx @@ -46,6 +46,7 @@ For the list of specific settings, see: - [Redis](/api-reference/workflow/destinations/redis) (`REDIS` for the Python SDK or `redis` for `curl` or Postman) - [Snowflake](/api-reference/workflow/destinations/snowflake) (`SNOWFLAKE` for the Python SDK or `snowflake` for `curl` or Postman) - [S3](/api-reference/workflow/destinations/s3) (`S3` for the Python SDK or `s3` for `curl` or Postman) +- [S3 Vectors](/api-reference/workflow/destinations/s3-vectors) (`S3_VECTORS` for the Python SDK or `s3_vectors` for `curl` or Postman) - [Teradata](/api-reference/workflow/destinations/teradata-sql) (`TERADATA` for the Python SDK or `teradata` for `curl` or Postman) - [Weaviate](/api-reference/workflow/destinations/weaviate) (`WEAVIATE` for the Python SDK or `weaviate` for `curl` or Postman) diff --git a/api-reference/workflow/destinations/s3-vectors.mdx b/api-reference/workflow/destinations/s3-vectors.mdx new file mode 100644 index 00000000..4c346e85 --- /dev/null +++ b/api-reference/workflow/destinations/s3-vectors.mdx @@ -0,0 +1,41 @@ +--- +title: S3 Vectors +--- + + + This article covers connecting Unstructured to Amazon S3 Vectors. + + For information about connecting Unstructured to Amazon S3 without support for Amazon S3 Vectors instead, see + [S3](/api-reference/workflow/destinations/s3). + + +import FirstTimeAPIDestinationConnector from '/snippets/general-shared-text/first-time-api-destination-connector.mdx'; + + + +Send processed data from Unstructured to Amazon S3 Vectors. + +The requirements are as follows. + +import s3VectorsPrerequisites from '/snippets/general-shared-text/s3-vectors.mdx'; + + + +## Create the destination connector + +To create an S3 Vectors destination connector, see the following examples. + +import s3VectorsSDK from '/snippets/destination_connectors/s3_vectors_sdk.mdx'; +import s3VectorsAPIRESTCreate from '/snippets/destination_connectors/s3_vectors_rest_create.mdx'; + + + + + + +Replace the preceding placeholders as follows: + +import s3VectorsAPIPlaceholders from '/snippets/general-shared-text/s3-vectors-api-placeholders.mdx'; + + + diff --git a/docs.json b/docs.json index 3f7b44e0..d622bc05 100644 --- a/docs.json +++ b/docs.json @@ -94,6 +94,7 @@ "ui/destinations/qdrant", "ui/destinations/redis", "ui/destinations/s3", + "ui/destinations/s3-vectors", "ui/destinations/snowflake", "ui/destinations/teradata-sql", "ui/destinations/weaviate" @@ -211,6 +212,7 @@ "api-reference/workflow/destinations/qdrant", "api-reference/workflow/destinations/redis", "api-reference/workflow/destinations/s3", + "api-reference/workflow/destinations/s3-vectors", "api-reference/workflow/destinations/snowflake", "api-reference/workflow/destinations/teradata-sql", "api-reference/workflow/destinations/weaviate" diff --git a/snippets/destination_connectors/s3_vectors_rest_create.mdx b/snippets/destination_connectors/s3_vectors_rest_create.mdx new file mode 100644 index 00000000..6ae597b4 --- /dev/null +++ b/snippets/destination_connectors/s3_vectors_rest_create.mdx @@ -0,0 +1,25 @@ +```bash curl +curl --request 'POST' --location \ +"$UNSTRUCTURED_API_URL/destinations" \ +--header 'accept: application/json' \ +--header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ +--header 'content-type: application/json' \ +--data \ +'{ + "name": "", + "type": "s3_vectors", + "config": { + "region": "", + "access_config": { + "key": "", + "secret": "", + "token": "" + }, + "ambient_credentials": "true|false", + "vector_bucket_name": "", + "index_name": "", + "key_prefix": "", + "batch_size": + } +}' +``` \ No newline at end of file diff --git a/snippets/destination_connectors/s3_vectors_sdk.mdx b/snippets/destination_connectors/s3_vectors_sdk.mdx new file mode 100644 index 00000000..8cdb6f29 --- /dev/null +++ b/snippets/destination_connectors/s3_vectors_sdk.mdx @@ -0,0 +1,32 @@ +```python Python SDK +import os + +from unstructured_client import UnstructuredClient +from unstructured_client.models.operations import CreateDestinationRequest +from unstructured_client.models.shared import CreateDestinationConnector + +with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: + response = client.destinations.create_destination( + request=CreateDestinationRequest( + create_destination_connector=CreateDestinationConnector( + name="", + type="s3_vectors", + config={ + "region": "", + "access_config": { + "key": "", + "secret": "", + "token": "" + }, + "ambient_credentials": "true|false", + "vector_bucket_name": "", + "index_name": "", + "key_prefix": "", + "batch_size": + } + ) + ) + ) + + print(response.destination_connector_information) +``` \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors-api-placeholders.mdx b/snippets/general-shared-text/s3-vectors-api-placeholders.mdx new file mode 100644 index 00000000..358097da --- /dev/null +++ b/snippets/general-shared-text/s3-vectors-api-placeholders.mdx @@ -0,0 +1,11 @@ +- `` (_required_) - A unique name for this connector. +- `` (_required_): The AWS Region (such as `us-east-1`) of the target Amazon S3 Vectors bucket. +- `` (_required_): The AWS access key ID for the target AWS IAM principal that has the appropriate access to the target bucket. +- `` (_required_): The AWS secret access key for the corresponding AWS access key ID. +- `` (_required_): The name of the target bucket. +- `` (_required_): The name of the target index in the bucket. +- ``: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. +- ``: Some string to prepend to each vector key. Prepending a string to each vector key can be useful for distinguishing between different + datasets in the same bucket. + Learn more about [vector keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-vectors.html). + The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors-platform.mdx b/snippets/general-shared-text/s3-vectors-platform.mdx new file mode 100644 index 00000000..5ba649e0 --- /dev/null +++ b/snippets/general-shared-text/s3-vectors-platform.mdx @@ -0,0 +1,10 @@ +Fill in the following fields: + +- **Name** (_required_): A unique name for this connector. +- **Region** (_required_): The AWS Region (such as `us-east-1`) of the target Amazon S3 Vectors bucket. +- **Key** (_required_): The AWS access key ID for the target AWS IAM principal that has the appropriate access to the target bucket. +- **Secret** (_required_): The AWS secret access key for the corresponding AWS access key ID. +- **Vector Bucket Name** (_required_): The name of the target bucket. +- **Index Name** (_required_): The name of the target index in the bucket. +- **Batch Size**: The maximum number of vectors to generate a single batch. The maximum is `500`. The default is `100` if not otherwise specified. +- **Key Prefix**: Some string to prepend to each vector key. The default is to not prepend a string to each vector key, if this value is not otherwise specified. \ No newline at end of file diff --git a/snippets/general-shared-text/s3-vectors.mdx b/snippets/general-shared-text/s3-vectors.mdx new file mode 100644 index 00000000..b44b9e82 --- /dev/null +++ b/snippets/general-shared-text/s3-vectors.mdx @@ -0,0 +1,92 @@ +- An Amazon S3 Vectors bucket. + + - Learn how to [create an S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-create.html). + - Learn how to [get the name of an existing S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-list.html). + +- The AWS Region (such as `us-east-1`) of the target S3 Vectors bucket. Learn how to [get the Region of an existing S3 Vectors bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-buckets-details.html). +- An index for the target S3 Vectors bucket. + + - Learn how to [create an index](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-create-index.html). + - Learn how to [get the name of an existing index](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-index-list.html). + + When creating an index, be sure to specify these settings: + + - **Vector index name** can be any allowed name pattern. + - For **Dimension**, only specify a number that is supported by Unstructured's available embedding models. + - For **Distance metric**, only specify **Cosine**. + - For **Metadata configuration** under **Additional settings**, Unstructured recommends that you specify the following 10 keys for **Non-filterable metadata**: + + - `text` + - `link_urls` + - `link_texts` + - `coordinates-points` + - `coordinates-system` + - `data_source-url` + - `data_source-record_locator` + - `data_source-date_created` + - `data_source-date_modified` + - `data_source-date_processed` + + - There are no Unstructured-specific requirements for **Encryption** or **Tags**. + + [Learn more about these index settings](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-create-index.html). + +- For the target index, the number of dimensions that are generated. + Learn how to [get the index's number of dimensions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-index-list.html). + +- The AWS access key ID and the AWS secret access key for the target AWS IAM principal (such as an IAM user or group) that has the appropriate access to the S3 Vectors bucket. + + - If you use identity-based policies to control access, the target IAM principal must have at minimum the following access permissions. Replace the following placeholders: + + - Replace `` with the AWS Region short ID of the target S3 Vectors bucket. + - Replace `` with the AWS account ID of the target S3 Vectors bucket. + - Replace `` with the name of the target S3 Vectors bucket. + - Replace `` with the name of the target index. + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AccountBucketListing", + "Effect": "Allow", + "Action": [ + "s3vectors:ListVectorBuckets" + ], + "Resource": "*" + }, + { + "Sid": "AllowBucketAccess", + "Effect": "Allow", + "Action": [ + "s3vectors:GetVectorBucket", + "s3vectors:ListIndexes" + ], + "Resource": "arn:aws:s3vectors:::bucket/" + }, + { + "Sid": "AllowIndexAccess", + "Effect": "Allow", + "Action": [ + "s3vectors:ListIndexes", + "s3vectors:GetIndex", + "s3vectors:ListVectors", + "s3vectors:QueryVectors", + "s3vectors:PutVectors", + "s3vectors:GetVectors", + "s3vectors:DeleteVectors" + ], + "Resource": "arn:aws:s3vectors:::bucket//index/" + } + ] + } + ``` + + [Learn more about these S3 Vectors access permissions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-access-management.html). + + - Learn how to attach an access policy to an IAM [user](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_change-permissions.html#users_change_permissions-add-console), + [group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html), + or [role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user.html). + - Learn how to [create and manage AWS access key IDs and their related AWS secret access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). + - Learn how to [switch from an IAM user to a role for temporary access](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_manage-assume.html). + diff --git a/ui/connectors.mdx b/ui/connectors.mdx index b2df9c27..dfa7ab4a 100644 --- a/ui/connectors.mdx +++ b/ui/connectors.mdx @@ -64,6 +64,7 @@ If your source is not listed here, you might still be able to connect Unstructur - [Qdrant](/ui/destinations/qdrant) - [Redis](/ui/destinations/redis) - [S3](/ui/destinations/s3) +- [S3 Vectors](/ui/destinations/s3-vectors) - [Snowflake](/ui/destinations/snowflake) - [Teradata](/ui/destinations/teradata-sql) - [Weaviate](/ui/destinations/weaviate) diff --git a/ui/destinations/overview.mdx b/ui/destinations/overview.mdx index ac6cf894..07814ce9 100644 --- a/ui/destinations/overview.mdx +++ b/ui/destinations/overview.mdx @@ -51,6 +51,7 @@ To create a destination connector: - [Qdrant](/ui/destinations/qdrant) - [Redis](/ui/destinations/redis) - [S3](/ui/destinations/s3) + - [S3 Vectors](/ui/destinations/s3-vectors) - [Snowflake](/ui/destinations/snowflake) - [Teradata](/ui/destinations/teradata-sql) - [Weaviate](/ui/destinations/weaviate) diff --git a/ui/destinations/s3-vectors.mdx b/ui/destinations/s3-vectors.mdx new file mode 100644 index 00000000..86cbeb49 --- /dev/null +++ b/ui/destinations/s3-vectors.mdx @@ -0,0 +1,42 @@ +--- +title: S3 Vectors +--- + + + This article covers connecting Unstructured to Amazon S3 Vectors. + + For information about connecting Unstructured to Amazon S3 without support for Amazon S3 Vectors instead, see + [S3](/ui/destinations/s3). + + +import FirstTimeUIDestinationConnector from '/snippets/general-shared-text/first-time-ui-destination-connector.mdx'; + + + +Send processed data from Unstructured to Amazon S3 Vectors. + +The requirements are as follows. + +import S3VectorsPrerequisites from '/snippets/general-shared-text/s3-vectors.mdx'; + + + +## Create the destination connector + +To create the destination connector: + +1. On the sidebar, click **Connectors**. +2. Click **Destinations**. +3. Click **New** or **Create Connector**. +4. Give the connector some unique **Name**. +5. In the **Provider** area, click **Amazon S3 Vectors**. +6. Click **Continue**. +7. Follow the on-screen instructions to fill in the fields as described later on this page. +8. Click **Save and Test**. + +import S3VectorsFields from '/snippets/general-shared-text/s3-vectors-platform.mdx'; + + + + + diff --git a/ui/destinations/s3.mdx b/ui/destinations/s3.mdx index 13f1fc08..77f1966e 100644 --- a/ui/destinations/s3.mdx +++ b/ui/destinations/s3.mdx @@ -2,6 +2,14 @@ title: S3 --- + + This article covers connecting Unstructured to Amazon S3 without support for Amazon S3 + Vectors. + + For information about connecting Unstructured to Amazon S3 Vectors instead, see + [S3 Vectors](/ui/destinations/s3-vectors). + + import FirstTimeUIDestinationConnector from '/snippets/general-shared-text/first-time-ui-destination-connector.mdx'; diff --git a/ui/sources/s3.mdx b/ui/sources/s3.mdx index c375872d..66cc5325 100644 --- a/ui/sources/s3.mdx +++ b/ui/sources/s3.mdx @@ -2,6 +2,14 @@ title: S3 --- + + This article covers connecting Unstructured to Amazon S3 as a source without support for Amazon S3 + Vectors. + + For information about connecting Unstructured to Amazon S3 Vectors as a destination only, see + [S3 Vectors](/ui/destinations/s3-vectors). + + import FirstTimeUISourceConnector from '/snippets/general-shared-text/first-time-ui-source-connector.mdx';