From d20978b466c9958f29538318e20f25d568e424e4 Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Wed, 17 Jun 2026 22:39:39 +0000 Subject: [PATCH 1/7] Add Terraform example for AWS deployment Deploys loreserver on ECS Fargate with S3/DynamoDB storage. DynamoDB schemas and IAM permissions verified against lore-aws source. Signed-off-by: Sam Biggins --- examples/aws/.gitignore | 6 + examples/aws/README.md | 89 +++++++++++++++ examples/aws/compute.tf | 87 +++++++++++++++ examples/aws/iam.tf | 85 ++++++++++++++ examples/aws/main.tf | 16 +++ examples/aws/network.tf | 147 +++++++++++++++++++++++++ examples/aws/outputs.tf | 19 ++++ examples/aws/storage.tf | 152 ++++++++++++++++++++++++++ examples/aws/terraform.tfvars.example | 3 + examples/aws/variables.tf | 21 ++++ examples/aws/versions.tf | 10 ++ 11 files changed, 635 insertions(+) create mode 100644 examples/aws/.gitignore create mode 100644 examples/aws/README.md create mode 100644 examples/aws/compute.tf create mode 100644 examples/aws/iam.tf create mode 100644 examples/aws/main.tf create mode 100644 examples/aws/network.tf create mode 100644 examples/aws/outputs.tf create mode 100644 examples/aws/storage.tf create mode 100644 examples/aws/terraform.tfvars.example create mode 100644 examples/aws/variables.tf create mode 100644 examples/aws/versions.tf diff --git a/examples/aws/.gitignore b/examples/aws/.gitignore new file mode 100644 index 0000000..7d126a9 --- /dev/null +++ b/examples/aws/.gitignore @@ -0,0 +1,6 @@ +*.tfstate +*.tfstate.* +*.tfplan +.terraform/ +.terraform.lock.hcl +terraform.tfvars diff --git a/examples/aws/README.md b/examples/aws/README.md new file mode 100644 index 0000000..313b72a --- /dev/null +++ b/examples/aws/README.md @@ -0,0 +1,89 @@ +# Lore on AWS + +Terraform configuration that deploys a Lore server on AWS with durable S3/DynamoDB storage using ECS Fargate. + +## What this creates + +- VPC with public and private subnets (2 AZs) +- S3 bucket for fragment storage (immutable store) +- 4 DynamoDB tables (fragments, metadata, mutable store, locks) +- ECS Fargate service running the loreserver container +- VPC endpoints for S3 and DynamoDB (reduces NAT costs) +- CloudWatch log group + +## Prerequisites + +- [Terraform](https://developer.hashicorp.com/terraform/install) >= 1.5 +- AWS credentials configured (`aws configure` or environment variables) +- A `loreserver` container image in ECR — build from the repo root: + +```sh +docker build -f lore-server/Dockerfile -t loreserver . + +aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin .dkr.ecr.us-west-2.amazonaws.com +aws ecr create-repository --repository-name loreserver --region us-west-2 +docker tag loreserver:latest .dkr.ecr.us-west-2.amazonaws.com/loreserver:latest +docker push .dkr.ecr.us-west-2.amazonaws.com/loreserver:latest +``` + +## Deploy + +```sh +cd examples/aws +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars — set your container_image URI and allowed_cidrs +terraform init +terraform apply +``` + +## Connect + +Get the task IP (Fargate assigns a private IP in the VPC): + +```sh +TASK_ARN=$(aws ecs list-tasks --cluster lore-cluster --service-name lore --query 'taskArns[0]' --output text) +TASK_IP=$(aws ecs describe-tasks --cluster lore-cluster --tasks "$TASK_ARN" \ + --query 'tasks[0].attachments[0].details[?name==`privateIPv4Address`].value' --output text) +echo "$TASK_IP" +``` + +The server generates an ephemeral self-signed certificate on startup. For local testing, skip TLS verification or use `lore://` (plain gRPC, QUIC still has TLS): + +```sh +lore clone lore://${TASK_IP}:41337/my-repo +``` + +For production, configure real TLS certificates (see Customize below) and use `lores://`. + +## Verify + +Check the service is running: + +```sh +aws ecs describe-services --cluster lore-cluster --services lore \ + --query 'services[0].{status:status,running:runningCount}' +``` + +Check server logs: + +```sh +aws logs tail /ecs/lore --since 5m +``` + +## Customize + +This example uses the simplest viable configuration. For production: + +- **TLS** — mount real certificates and set `LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE` / `PKEY_FILE` (and the same for `GRPC`). See [Server configuration reference](https://epicgames.github.io/lore/reference/lore-server-config/#certificate-block). +- **Auth** — configure `LORE__SERVER__AUTH__JWK__ENDPOINT` to validate JWTs. See [Authentication](https://epicgames.github.io/lore/reference/lore-server-config/#authentication). +- **Caching** — switch from Fargate to EC2 with NVMe instances and use `LORE__IMMUTABLE_STORE__MODE=composite` for a local cache in front of S3. +- **Replication** — add edge nodes with `LORE__IMMUTABLE_STORE__MODE=replicated` for multi-region. See [Topology](https://epicgames.github.io/lore/reference/lore-server-config/#topology-settings). +- **HMAC** — set `LORE__SERVER__HTTP__PRESIGNED_URL_HMAC_KEY` (hex, ≥32 bytes) to enable presigned URLs for direct client-to-S3 transfers. + +## Destroy + +```sh +terraform destroy +``` + +Teardown takes 2–3 minutes (VPC, NAT gateway deletion). diff --git a/examples/aws/compute.tf b/examples/aws/compute.tf new file mode 100644 index 0000000..7e630c7 --- /dev/null +++ b/examples/aws/compute.tf @@ -0,0 +1,87 @@ +# ============================================================================= +# ECS Cluster + Fargate Service +# ============================================================================= + +resource "aws_ecs_cluster" "this" { + name = "${local.name}-cluster" + tags = local.tags +} + +resource "aws_cloudwatch_log_group" "lore" { + name = "/ecs/${local.name}" + retention_in_days = 7 + tags = local.tags +} + +resource "aws_ecs_task_definition" "lore" { + family = local.name + requires_compatibilities = ["FARGATE"] + network_mode = "awsvpc" + cpu = "1024" + memory = "2048" + execution_role_arn = aws_iam_role.execution.arn + task_role_arn = aws_iam_role.task.arn + + container_definitions = jsonencode([{ + name = "loreserver" + image = var.container_image + essential = true + + portMappings = [ + { containerPort = local.port_quic_grpc, protocol = "tcp" }, + { containerPort = local.port_quic_grpc, protocol = "udp" }, + { containerPort = local.port_http, protocol = "tcp" }, + ] + + environment = [ + { name = "LORE_ENV", value = "docker" }, + { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, + + # Storage: S3 + DynamoDB via the aws plugin + { name = "LORE__IMMUTABLE_STORE__MODE", value = "aws" }, + { name = "LORE__MUTABLE_STORE__MODE", value = "aws" }, + { name = "LORE__LOCK_STORE__MODE", value = "aws" }, + + # AWS plugin config — resource names from Terraform + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__S3_BUCKET", value = aws_s3_bucket.fragments.id }, + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_FRAGMENTS_TABLE", value = aws_dynamodb_table.fragments.name }, + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_METADATA_TABLE", value = aws_dynamodb_table.metadata.name }, + { name = "LORE__PLUGINS__AWS__MUTABLE_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.mutable.name }, + { name = "LORE__PLUGINS__AWS__LOCK_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.locks.name }, + ] + + # TLS: The server generates an ephemeral self-signed certificate when no + # certificate is configured. For production, mount real certs and set: + # LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE=/certs/cert.pem + # LORE__SERVER__QUIC__CERTIFICATE__PKEY_FILE=/certs/key.pem + # LORE__SERVER__GRPC__CERTIFICATE__CERT_FILE=/certs/cert.pem + # LORE__SERVER__GRPC__CERTIFICATE__PKEY_FILE=/certs/key.pem + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "lore" + } + } + }]) + + tags = local.tags +} + +resource "aws_ecs_service" "lore" { + name = local.name + cluster = aws_ecs_cluster.this.id + task_definition = aws_ecs_task_definition.lore.arn + desired_count = 1 + launch_type = "FARGATE" + + network_configuration { + subnets = aws_subnet.private[*].id + security_groups = [aws_security_group.lore.id] + assign_public_ip = false + } + + tags = local.tags +} diff --git a/examples/aws/iam.tf b/examples/aws/iam.tf new file mode 100644 index 0000000..b976c05 --- /dev/null +++ b/examples/aws/iam.tf @@ -0,0 +1,85 @@ +# ============================================================================= +# IAM — ECS task role (S3 + DynamoDB access) and execution role (ECR + logs) +# ============================================================================= + +# Task role — what the loreserver container can do +resource "aws_iam_role" "task" { + name_prefix = "${local.name}-task-" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ecs-tasks.amazonaws.com" } + }] + }) + tags = local.tags +} + +resource "aws_iam_role_policy" "task_s3" { + name_prefix = "s3-" + role = aws_iam_role.task.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:ListBucketVersions", + ] + Resource = [ + aws_s3_bucket.fragments.arn, + "${aws_s3_bucket.fragments.arn}/*", + ] + }] + }) +} + +resource "aws_iam_role_policy" "task_dynamodb" { + name_prefix = "dynamodb-" + role = aws_iam_role.task.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "dynamodb:GetItem", + "dynamodb:PutItem", + "dynamodb:DeleteItem", + "dynamodb:Query", + "dynamodb:BatchGetItem", + "dynamodb:DescribeTable", + "dynamodb:TransactWriteItems", + ] + Resource = [ + aws_dynamodb_table.fragments.arn, + aws_dynamodb_table.metadata.arn, + aws_dynamodb_table.mutable.arn, + aws_dynamodb_table.locks.arn, + "${aws_dynamodb_table.locks.arn}/index/*", + ] + }] + }) +} + +# Execution role — what ECS needs to start the task (pull image, write logs, read secrets) +resource "aws_iam_role" "execution" { + name_prefix = "${local.name}-exec-" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ecs-tasks.amazonaws.com" } + }] + }) + tags = local.tags +} + +resource "aws_iam_role_policy_attachment" "execution_ecr" { + role = aws_iam_role.execution.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} diff --git a/examples/aws/main.tf b/examples/aws/main.tf new file mode 100644 index 0000000..7d0122f --- /dev/null +++ b/examples/aws/main.tf @@ -0,0 +1,16 @@ +provider "aws" { + region = var.region +} + +locals { + name = var.name + tags = { ManagedBy = "terraform", Project = "lore" } + + # Ports — match lore-server/config/default.toml + port_quic_grpc = 41337 # QUIC (UDP) + gRPC (TCP) + port_http = 41339 # Health checks, presigned URLs +} + +data "aws_availability_zones" "available" { + state = "available" +} diff --git a/examples/aws/network.tf b/examples/aws/network.tf new file mode 100644 index 0000000..9765a73 --- /dev/null +++ b/examples/aws/network.tf @@ -0,0 +1,147 @@ +# ============================================================================= +# VPC — minimal 2-AZ layout with public + private subnets +# ============================================================================= + +resource "aws_vpc" "this" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true + enable_dns_support = true + tags = merge(local.tags, { Name = "${local.name}-vpc" }) +} + +resource "aws_internet_gateway" "this" { + vpc_id = aws_vpc.this.id + tags = merge(local.tags, { Name = "${local.name}-igw" }) +} + +resource "aws_subnet" "public" { + count = 2 + vpc_id = aws_vpc.this.id + cidr_block = cidrsubnet(aws_vpc.this.cidr_block, 8, count.index) + availability_zone = data.aws_availability_zones.available.names[count.index] + map_public_ip_on_launch = true + tags = merge(local.tags, { Name = "${local.name}-public-${count.index}" }) +} + +resource "aws_subnet" "private" { + count = 2 + vpc_id = aws_vpc.this.id + cidr_block = cidrsubnet(aws_vpc.this.cidr_block, 8, count.index + 10) + availability_zone = data.aws_availability_zones.available.names[count.index] + tags = merge(local.tags, { Name = "${local.name}-private-${count.index}" }) +} + +resource "aws_eip" "nat" { + domain = "vpc" + tags = merge(local.tags, { Name = "${local.name}-nat-eip" }) +} + +resource "aws_nat_gateway" "this" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(local.tags, { Name = "${local.name}-nat" }) +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.this.id + tags = merge(local.tags, { Name = "${local.name}-public-rt" }) +} + +resource "aws_route" "public_internet" { + route_table_id = aws_route_table.public.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.this.id +} + +resource "aws_route_table_association" "public" { + count = 2 + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.this.id + tags = merge(local.tags, { Name = "${local.name}-private-rt" }) +} + +resource "aws_route" "private_nat" { + route_table_id = aws_route_table.private.id + destination_cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.this.id +} + +resource "aws_route_table_association" "private" { + count = 2 + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# ============================================================================= +# Security Group — Lore server +# ============================================================================= + +resource "aws_security_group" "lore" { + name_prefix = "${local.name}-server-" + description = "Lore server ports" + vpc_id = aws_vpc.this.id + tags = merge(local.tags, { Name = "${local.name}-server-sg" }) + + lifecycle { create_before_destroy = true } +} + +# Client access: QUIC (UDP) + gRPC (TCP) on 41337 +resource "aws_vpc_security_group_ingress_rule" "client_quic" { + for_each = toset(var.allowed_cidrs) + security_group_id = aws_security_group.lore.id + from_port = local.port_quic_grpc + to_port = local.port_quic_grpc + ip_protocol = "udp" + cidr_ipv4 = each.value + description = "Client QUIC" +} + +resource "aws_vpc_security_group_ingress_rule" "client_grpc" { + for_each = toset(var.allowed_cidrs) + security_group_id = aws_security_group.lore.id + from_port = local.port_quic_grpc + to_port = local.port_quic_grpc + ip_protocol = "tcp" + cidr_ipv4 = each.value + description = "Client gRPC" +} + +# HTTP health checks + presigned URLs +resource "aws_vpc_security_group_ingress_rule" "client_http" { + for_each = toset(var.allowed_cidrs) + security_group_id = aws_security_group.lore.id + from_port = local.port_http + to_port = local.port_http + ip_protocol = "tcp" + cidr_ipv4 = each.value + description = "Client HTTP" +} + +resource "aws_vpc_security_group_egress_rule" "all" { + security_group_id = aws_security_group.lore.id + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + description = "All outbound" +} + +# ============================================================================= +# VPC Endpoints — S3 and DynamoDB (avoid NAT costs for AWS API traffic) +# ============================================================================= + +resource "aws_vpc_endpoint" "s3" { + vpc_id = aws_vpc.this.id + service_name = "com.amazonaws.${var.region}.s3" + route_table_ids = [aws_route_table.private.id] + tags = merge(local.tags, { Name = "${local.name}-s3-endpoint" }) +} + +resource "aws_vpc_endpoint" "dynamodb" { + vpc_id = aws_vpc.this.id + service_name = "com.amazonaws.${var.region}.dynamodb" + route_table_ids = [aws_route_table.private.id] + tags = merge(local.tags, { Name = "${local.name}-dynamodb-endpoint" }) +} diff --git a/examples/aws/outputs.tf b/examples/aws/outputs.tf new file mode 100644 index 0000000..f2a6f85 --- /dev/null +++ b/examples/aws/outputs.tf @@ -0,0 +1,19 @@ +output "cluster_name" { + description = "ECS cluster name" + value = aws_ecs_cluster.this.name +} + +output "service_name" { + description = "ECS service name" + value = aws_ecs_service.lore.name +} + +output "s3_bucket" { + description = "S3 bucket for fragment storage" + value = aws_s3_bucket.fragments.id +} + +output "log_group" { + description = "CloudWatch log group" + value = aws_cloudwatch_log_group.lore.name +} diff --git a/examples/aws/storage.tf b/examples/aws/storage.tf new file mode 100644 index 0000000..de8a2ab --- /dev/null +++ b/examples/aws/storage.tf @@ -0,0 +1,152 @@ +# ============================================================================= +# S3 — Fragment payloads (immutable store) +# ============================================================================= + +resource "aws_s3_bucket" "fragments" { + bucket_prefix = "${local.name}-fragments-" + tags = local.tags +} + +resource "aws_s3_bucket_versioning" "fragments" { + bucket = aws_s3_bucket.fragments.id + versioning_configuration { status = "Enabled" } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "fragments" { + bucket = aws_s3_bucket.fragments.id + rule { + apply_server_side_encryption_by_default { sse_algorithm = "AES256" } + } +} + +resource "aws_s3_bucket_public_access_block" "fragments" { + bucket = aws_s3_bucket.fragments.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# ============================================================================= +# DynamoDB — Fragment associations +# Key schema from lore-aws/src/store/immutable_store.rs +# ============================================================================= + +resource "aws_dynamodb_table" "fragments" { + name = "${local.name}-fragments" + billing_mode = "PAY_PER_REQUEST" + hash_key = "hash" + range_key = "repository_context" + + attribute { + name = "hash" + type = "B" + } + attribute { + name = "repository_context" + type = "B" + } + + tags = local.tags +} + +# ============================================================================= +# DynamoDB — Fragment metadata (hash-only key, no sort key) +# Key schema from lore-aws/src/store/immutable_store.rs +# ============================================================================= + +resource "aws_dynamodb_table" "metadata" { + name = "${local.name}-metadata" + billing_mode = "PAY_PER_REQUEST" + hash_key = "hash" + + attribute { + name = "hash" + type = "B" + } + + tags = local.tags +} + +# ============================================================================= +# DynamoDB — Mutable store (branch pointers) +# Key schema from lore-aws/src/store/mutable_store.rs +# ============================================================================= + +resource "aws_dynamodb_table" "mutable" { + name = "${local.name}-mutable" + billing_mode = "PAY_PER_REQUEST" + hash_key = "repository_id" + range_key = "key" + + attribute { + name = "repository_id" + type = "B" + } + attribute { + name = "key" + type = "B" + } + + tags = local.tags +} + +# ============================================================================= +# DynamoDB — Distributed locks +# Key schema + GSIs from lore-aws/src/store/lock_store.rs +# ============================================================================= + +resource "aws_dynamodb_table" "locks" { + name = "${local.name}-locks" + billing_mode = "PAY_PER_REQUEST" + hash_key = "hash" + range_key = "repositoryBranch" + + attribute { + name = "hash" + type = "B" + } + attribute { + name = "repositoryBranch" + type = "B" + } + attribute { + name = "ownerId" + type = "S" + } + attribute { + name = "repository" + type = "B" + } + attribute { + name = "branch" + type = "B" + } + attribute { + name = "description" + type = "S" + } + + global_secondary_index { + name = "owner-repo-branch" + hash_key = "ownerId" + range_key = "repositoryBranch" + projection_type = "ALL" + } + + global_secondary_index { + name = "repo-branch" + hash_key = "repository" + range_key = "branch" + projection_type = "ALL" + } + + global_secondary_index { + name = "repo-branch-description" + hash_key = "repositoryBranch" + range_key = "description" + projection_type = "ALL" + } + + tags = local.tags +} diff --git a/examples/aws/terraform.tfvars.example b/examples/aws/terraform.tfvars.example new file mode 100644 index 0000000..27a34c0 --- /dev/null +++ b/examples/aws/terraform.tfvars.example @@ -0,0 +1,3 @@ +region = "us-west-2" +container_image = "123456789012.dkr.ecr.us-west-2.amazonaws.com/loreserver:latest" +allowed_cidrs = ["10.0.0.0/8"] diff --git a/examples/aws/variables.tf b/examples/aws/variables.tf new file mode 100644 index 0000000..5cf76b4 --- /dev/null +++ b/examples/aws/variables.tf @@ -0,0 +1,21 @@ +variable "container_image" { + description = "Loreserver container image URI in ECR" + type = string +} + +variable "allowed_cidrs" { + description = "CIDR blocks allowed to connect to Lore (e.g., your VPN or office IP)" + type = list(string) +} + +variable "region" { + description = "AWS region" + type = string + default = "us-west-2" +} + +variable "name" { + description = "Name prefix for all resources" + type = string + default = "lore" +} diff --git a/examples/aws/versions.tf b/examples/aws/versions.tf new file mode 100644 index 0000000..00bd111 --- /dev/null +++ b/examples/aws/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.5" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } +} From a60b9cf76e7f4efa9f2f96c119a6c227bb1b4bb7 Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Wed, 17 Jun 2026 22:50:22 +0000 Subject: [PATCH 2/7] Address PR review: clarify plugin registration and VPC connectivity - Explain that the Dockerfile build auto-registers lore-aws plugin - Document that the task runs in private subnets (VPC access required) - Add ingress to the Customize section for production paths Signed-off-by: Sam Biggins --- examples/aws/README.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/examples/aws/README.md b/examples/aws/README.md index 313b72a..8c872d0 100644 --- a/examples/aws/README.md +++ b/examples/aws/README.md @@ -26,6 +26,11 @@ docker tag loreserver:latest .dkr.ecr.us-west-2.amazonaws.com/lorese docker push .dkr.ecr.us-west-2.amazonaws.com/loreserver:latest ``` +The Dockerfile builds the `loreserver` binary from the workspace, which includes +the `lore-aws` crate. The server's `main()` calls `register_all_plugins()` at +startup, registering the AWS (S3 + DynamoDB) and HashiCorp (Consul) plugins +automatically. No custom binary or fork is needed. + ## Deploy ```sh @@ -38,7 +43,10 @@ terraform apply ## Connect -Get the task IP (Fargate assigns a private IP in the VPC): +The ECS service runs in private subnets. You must connect from within the VPC +(e.g., an EC2 instance, VPN, AWS Client VPN, or SSM port-forwarding session). + +Get the task IP: ```sh TASK_ARN=$(aws ecs list-tasks --cluster lore-cluster --service-name lore --query 'taskArns[0]' --output text) @@ -47,13 +55,15 @@ TASK_IP=$(aws ecs describe-tasks --cluster lore-cluster --tasks "$TASK_ARN" \ echo "$TASK_IP" ``` -The server generates an ephemeral self-signed certificate on startup. For local testing, skip TLS verification or use `lore://` (plain gRPC, QUIC still has TLS): +From a host inside the VPC: ```sh lore clone lore://${TASK_IP}:41337/my-repo ``` -For production, configure real TLS certificates (see Customize below) and use `lores://`. +The server generates an ephemeral self-signed certificate on startup. Use +`lore://` (plain gRPC control plane — QUIC data path still uses TLS) or +configure real certificates and use `lores://` (see Customize below). ## Verify @@ -74,6 +84,7 @@ aws logs tail /ecs/lore --since 5m This example uses the simplest viable configuration. For production: +- **Ingress** — add an NLB, AWS Client VPN, or bastion host for access from outside the VPC. - **TLS** — mount real certificates and set `LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE` / `PKEY_FILE` (and the same for `GRPC`). See [Server configuration reference](https://epicgames.github.io/lore/reference/lore-server-config/#certificate-block). - **Auth** — configure `LORE__SERVER__AUTH__JWK__ENDPOINT` to validate JWTs. See [Authentication](https://epicgames.github.io/lore/reference/lore-server-config/#authentication). - **Caching** — switch from Fargate to EC2 with NVMe instances and use `LORE__IMMUTABLE_STORE__MODE=composite` for a local cache in front of S3. From 4ce9533b22376f12a0f6fc5ffccf4b05082e7202 Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Wed, 17 Jun 2026 23:44:44 +0000 Subject: [PATCH 3/7] Add s3:DeleteObjectVersion, edge pod with Cloud Map discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add s3:DeleteObjectVersion (required for versioned bucket cleanup) - Add edge pod service with replicated+remote stores via Cloud Map - Add Cloud Map private DNS for edge→primary discovery - Add internal SG rules for node-to-node QUIC+gRPC Signed-off-by: Sam Biggins --- examples/aws/compute.tf | 104 ++++++++++++++++++++++++++++++++++++++++ examples/aws/iam.tf | 1 + examples/aws/main.tf | 5 +- examples/aws/network.tf | 30 ++++++++++++ examples/aws/outputs.tf | 12 ++++- 5 files changed, 149 insertions(+), 3 deletions(-) diff --git a/examples/aws/compute.tf b/examples/aws/compute.tf index 7e630c7..4c3eb97 100644 --- a/examples/aws/compute.tf +++ b/examples/aws/compute.tf @@ -83,5 +83,109 @@ resource "aws_ecs_service" "lore" { assign_public_ip = false } + service_registries { + registry_arn = aws_service_discovery_service.lore.arn + } + + tags = local.tags +} + +# ============================================================================= +# Cloud Map — Service discovery for edge → primary +# ============================================================================= + +resource "aws_service_discovery_private_dns_namespace" "this" { + name = "${local.name}.internal" + vpc = aws_vpc.this.id + tags = local.tags +} + +resource "aws_service_discovery_service" "lore" { + name = "primary" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.this.id + dns_records { + ttl = 10 + type = "A" + } + routing_policy = "MULTIVALUE" + } + + health_check_custom_config { + failure_threshold = 1 + } + + tags = local.tags +} + +# ============================================================================= +# Edge Pod — Caching node with replicated + remote stores +# ============================================================================= + +resource "aws_ecs_task_definition" "edge" { + family = "${local.name}-edge" + requires_compatibilities = ["FARGATE"] + network_mode = "awsvpc" + cpu = "1024" + memory = "2048" + execution_role_arn = aws_iam_role.execution.arn + task_role_arn = aws_iam_role.task.arn + + container_definitions = jsonencode([{ + name = "loreserver" + image = var.container_image + essential = true + + portMappings = [ + { containerPort = local.port_quic_grpc, protocol = "tcp" }, + { containerPort = local.port_quic_grpc, protocol = "udp" }, + { containerPort = local.port_http, protocol = "tcp" }, + ] + + environment = [ + { name = "LORE_ENV", value = "docker" }, + { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, + + # Edge stores: replicated immutable (pulls from primary) + remote mutable (proxies to primary) + { name = "LORE__IMMUTABLE_STORE__MODE", value = "replicated" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REMOTE_URL", value = "lore://primary.${local.name}.internal:${local.port_quic_grpc}" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__PERIODIC_CLIENT_REFRESH_SECS", value = "300" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__INITIAL_BACKOFF_MS", value = "100" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_BACKOFF_MS", value = "5000" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_ATTEMPTS", value = "10" }, + { name = "LORE__MUTABLE_STORE__MODE", value = "remote" }, + { name = "LORE__MUTABLE_STORE__REMOTE__REMOTE_URL", value = "lore://primary.${local.name}.internal:${local.port_quic_grpc}" }, + { name = "LORE__LOCK_STORE__MODE", value = "local" }, + ] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "edge" + } + } + }]) + + tags = local.tags +} + +resource "aws_ecs_service" "edge" { + name = "${local.name}-edge" + cluster = aws_ecs_cluster.this.id + task_definition = aws_ecs_task_definition.edge.arn + desired_count = 1 + launch_type = "FARGATE" + + network_configuration { + subnets = aws_subnet.private[*].id + security_groups = [aws_security_group.lore.id] + assign_public_ip = false + } + + depends_on = [aws_ecs_service.lore] + tags = local.tags } diff --git a/examples/aws/iam.tf b/examples/aws/iam.tf index b976c05..02c655c 100644 --- a/examples/aws/iam.tf +++ b/examples/aws/iam.tf @@ -27,6 +27,7 @@ resource "aws_iam_role_policy" "task_s3" { "s3:GetObject", "s3:PutObject", "s3:DeleteObject", + "s3:DeleteObjectVersion", "s3:ListBucket", "s3:ListBucketVersions", ] diff --git a/examples/aws/main.tf b/examples/aws/main.tf index 7d0122f..b644544 100644 --- a/examples/aws/main.tf +++ b/examples/aws/main.tf @@ -7,8 +7,9 @@ locals { tags = { ManagedBy = "terraform", Project = "lore" } # Ports — match lore-server/config/default.toml - port_quic_grpc = 41337 # QUIC (UDP) + gRPC (TCP) - port_http = 41339 # Health checks, presigned URLs + port_quic_grpc = 41337 # QUIC (UDP) + gRPC (TCP) + port_http = 41339 # Health checks, presigned URLs + port_replication = 41340 # QUIC internal replication (UDP) } data "aws_availability_zones" "available" { diff --git a/examples/aws/network.tf b/examples/aws/network.tf index 9765a73..48b109d 100644 --- a/examples/aws/network.tf +++ b/examples/aws/network.tf @@ -121,6 +121,36 @@ resource "aws_vpc_security_group_ingress_rule" "client_http" { description = "Client HTTP" } +# Internal: QUIC replication (edge → primary on 41340 UDP) +resource "aws_vpc_security_group_ingress_rule" "replication_quic" { + security_group_id = aws_security_group.lore.id + from_port = 41340 + to_port = 41340 + ip_protocol = "udp" + referenced_security_group_id = aws_security_group.lore.id + description = "QUIC replication between Lore nodes" +} + +# Internal: gRPC (edge → primary on 41337 TCP for remote mutable store) +resource "aws_vpc_security_group_ingress_rule" "internal_grpc" { + security_group_id = aws_security_group.lore.id + from_port = 41337 + to_port = 41337 + ip_protocol = "tcp" + referenced_security_group_id = aws_security_group.lore.id + description = "gRPC between Lore nodes" +} + +# Internal: QUIC (edge → primary on 41337 UDP for replicated immutable store) +resource "aws_vpc_security_group_ingress_rule" "internal_quic" { + security_group_id = aws_security_group.lore.id + from_port = 41337 + to_port = 41337 + ip_protocol = "udp" + referenced_security_group_id = aws_security_group.lore.id + description = "QUIC between Lore nodes" +} + resource "aws_vpc_security_group_egress_rule" "all" { security_group_id = aws_security_group.lore.id ip_protocol = "-1" diff --git a/examples/aws/outputs.tf b/examples/aws/outputs.tf index f2a6f85..5fec610 100644 --- a/examples/aws/outputs.tf +++ b/examples/aws/outputs.tf @@ -4,10 +4,20 @@ output "cluster_name" { } output "service_name" { - description = "ECS service name" + description = "ECS service name (primary)" value = aws_ecs_service.lore.name } +output "edge_service_name" { + description = "ECS service name (edge)" + value = aws_ecs_service.edge.name +} + +output "primary_dns" { + description = "Cloud Map DNS for primary (used by edge pods)" + value = "primary.${aws_service_discovery_private_dns_namespace.this.name}" +} + output "s3_bucket" { description = "S3 bucket for fragment storage" value = aws_s3_bucket.fragments.id From 1ef073cea4ee70f534c65818212fd0a173c19aaa Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Wed, 17 Jun 2026 23:49:25 +0000 Subject: [PATCH 4/7] Wire TLS certs for inter-node replication - Generate CA + server cert via tls provider (SAN: primary.lore.internal) - Store certs in Secrets Manager, provision via init containers - Primary: enables quic_internal:41340 with cert for edge replication - Edge: trusts primary CA via SSL_CERT_FILE, connects replicated+remote - Both services confirmed running in deployment test Signed-off-by: Sam Biggins --- examples/aws/README.md | 9 +- examples/aws/compute.tf | 239 ++++++++++++++++++++++++++------------- examples/aws/iam.tf | 13 +++ examples/aws/outputs.tf | 6 + examples/aws/tls.tf | 73 ++++++++++++ examples/aws/versions.tf | 4 + 6 files changed, 262 insertions(+), 82 deletions(-) create mode 100644 examples/aws/tls.tf diff --git a/examples/aws/README.md b/examples/aws/README.md index 8c872d0..ee8ad0b 100644 --- a/examples/aws/README.md +++ b/examples/aws/README.md @@ -7,7 +7,10 @@ Terraform configuration that deploys a Lore server on AWS with durable S3/Dynamo - VPC with public and private subnets (2 AZs) - S3 bucket for fragment storage (immutable store) - 4 DynamoDB tables (fragments, metadata, mutable store, locks) -- ECS Fargate service running the loreserver container +- ECS Fargate primary service with S3/DynamoDB storage +- ECS Fargate edge service with replicated storage (caches from primary) +- Cloud Map private DNS for edge → primary service discovery +- Self-signed TLS CA + server certificate (inter-node trust) - VPC endpoints for S3 and DynamoDB (reduces NAT costs) - CloudWatch log group @@ -88,7 +91,7 @@ This example uses the simplest viable configuration. For production: - **TLS** — mount real certificates and set `LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE` / `PKEY_FILE` (and the same for `GRPC`). See [Server configuration reference](https://epicgames.github.io/lore/reference/lore-server-config/#certificate-block). - **Auth** — configure `LORE__SERVER__AUTH__JWK__ENDPOINT` to validate JWTs. See [Authentication](https://epicgames.github.io/lore/reference/lore-server-config/#authentication). - **Caching** — switch from Fargate to EC2 with NVMe instances and use `LORE__IMMUTABLE_STORE__MODE=composite` for a local cache in front of S3. -- **Replication** — add edge nodes with `LORE__IMMUTABLE_STORE__MODE=replicated` for multi-region. See [Topology](https://epicgames.github.io/lore/reference/lore-server-config/#topology-settings). +- **Replication** — add more edge nodes or deploy to other regions. See [Topology](https://epicgames.github.io/lore/reference/lore-server-config/#topology-settings). - **HMAC** — set `LORE__SERVER__HTTP__PRESIGNED_URL_HMAC_KEY` (hex, ≥32 bytes) to enable presigned URLs for direct client-to-S3 transfers. ## Destroy @@ -97,4 +100,4 @@ This example uses the simplest viable configuration. For production: terraform destroy ``` -Teardown takes 2–3 minutes (VPC, NAT gateway deletion). +Teardown includes VPC and NAT gateway deletion. diff --git a/examples/aws/compute.tf b/examples/aws/compute.tf index 4c3eb97..7ae9606 100644 --- a/examples/aws/compute.tf +++ b/examples/aws/compute.tf @@ -1,5 +1,5 @@ # ============================================================================= -# ECS Cluster + Fargate Service +# ECS Cluster + Primary + Edge Services # ============================================================================= resource "aws_ecs_cluster" "this" { @@ -13,6 +13,10 @@ resource "aws_cloudwatch_log_group" "lore" { tags = local.tags } +# ============================================================================= +# Primary — Durable storage (S3 + DynamoDB), serves replication to edge +# ============================================================================= + resource "aws_ecs_task_definition" "lore" { family = local.name requires_compatibilities = ["FARGATE"] @@ -22,50 +26,92 @@ resource "aws_ecs_task_definition" "lore" { execution_role_arn = aws_iam_role.execution.arn task_role_arn = aws_iam_role.task.arn - container_definitions = jsonencode([{ - name = "loreserver" - image = var.container_image - essential = true - - portMappings = [ - { containerPort = local.port_quic_grpc, protocol = "tcp" }, - { containerPort = local.port_quic_grpc, protocol = "udp" }, - { containerPort = local.port_http, protocol = "tcp" }, - ] - - environment = [ - { name = "LORE_ENV", value = "docker" }, - { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, - - # Storage: S3 + DynamoDB via the aws plugin - { name = "LORE__IMMUTABLE_STORE__MODE", value = "aws" }, - { name = "LORE__MUTABLE_STORE__MODE", value = "aws" }, - { name = "LORE__LOCK_STORE__MODE", value = "aws" }, - - # AWS plugin config — resource names from Terraform - { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__S3_BUCKET", value = aws_s3_bucket.fragments.id }, - { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_FRAGMENTS_TABLE", value = aws_dynamodb_table.fragments.name }, - { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_METADATA_TABLE", value = aws_dynamodb_table.metadata.name }, - { name = "LORE__PLUGINS__AWS__MUTABLE_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.mutable.name }, - { name = "LORE__PLUGINS__AWS__LOCK_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.locks.name }, - ] - - # TLS: The server generates an ephemeral self-signed certificate when no - # certificate is configured. For production, mount real certs and set: - # LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE=/certs/cert.pem - # LORE__SERVER__QUIC__CERTIFICATE__PKEY_FILE=/certs/key.pem - # LORE__SERVER__GRPC__CERTIFICATE__CERT_FILE=/certs/cert.pem - # LORE__SERVER__GRPC__CERTIFICATE__PKEY_FILE=/certs/key.pem - - logConfiguration = { - logDriver = "awslogs" - options = { - "awslogs-group" = aws_cloudwatch_log_group.lore.name - "awslogs-region" = var.region - "awslogs-stream-prefix" = "lore" + volume { + name = "certs" + } + + container_definitions = jsonencode([ + # Init container: write TLS certs from secrets to shared volume + { + name = "init-certs" + image = "public.ecr.aws/amazonlinux/amazonlinux:minimal" + essential = false + command = ["sh", "-c", "echo \"$CERT\" > /certs/fullchain.crt && echo \"$KEY\" > /certs/server.key && echo \"$CA\" > /certs/ca.pem"] + + secrets = [ + { name = "CERT", valueFrom = "${aws_secretsmanager_secret.tls.arn}:fullchain::" }, + { name = "KEY", valueFrom = "${aws_secretsmanager_secret.tls.arn}:key::" }, + { name = "CA", valueFrom = "${aws_secretsmanager_secret.tls.arn}:ca::" }, + ] + + mountPoints = [{ sourceVolume = "certs", containerPath = "/certs", readOnly = false }] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "init" + } } - } - }]) + }, + # Loreserver primary + { + name = "loreserver" + image = var.container_image + essential = true + + dependsOn = [{ containerName = "init-certs", condition = "SUCCESS" }] + + portMappings = [ + { containerPort = local.port_quic_grpc, protocol = "tcp" }, + { containerPort = local.port_quic_grpc, protocol = "udp" }, + { containerPort = local.port_http, protocol = "tcp" }, + { containerPort = local.port_replication, protocol = "udp" }, + ] + + mountPoints = [{ sourceVolume = "certs", containerPath = "/certs", readOnly = true }] + + environment = [ + { name = "LORE_ENV", value = "docker" }, + { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, + + # TLS for all endpoints + { name = "LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE", value = "/certs/fullchain.crt" }, + { name = "LORE__SERVER__QUIC__CERTIFICATE__PKEY_FILE", value = "/certs/server.key" }, + { name = "LORE__SERVER__GRPC__CERTIFICATE__CERT_FILE", value = "/certs/fullchain.crt" }, + { name = "LORE__SERVER__GRPC__CERTIFICATE__PKEY_FILE", value = "/certs/server.key" }, + { name = "LORE__SERVER__GRPC__VERIFY_CLIENT_CERTS", value = "false" }, + + # Enable internal QUIC for edge pod replication + { name = "LORE__SERVER__QUIC_INTERNAL__ENABLED", value = "true" }, + { name = "LORE__SERVER__QUIC_INTERNAL__CERTIFICATE__CERT_FILE", value = "/certs/fullchain.crt" }, + { name = "LORE__SERVER__QUIC_INTERNAL__CERTIFICATE__PKEY_FILE", value = "/certs/server.key" }, + { name = "LORE__SERVER__QUIC_INTERNAL__VERIFY_CLIENT_CERTS", value = "false" }, + + # Storage: S3 + DynamoDB via the aws plugin + { name = "LORE__IMMUTABLE_STORE__MODE", value = "aws" }, + { name = "LORE__MUTABLE_STORE__MODE", value = "aws" }, + { name = "LORE__LOCK_STORE__MODE", value = "aws" }, + + # AWS plugin config + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__S3_BUCKET", value = aws_s3_bucket.fragments.id }, + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_FRAGMENTS_TABLE", value = aws_dynamodb_table.fragments.name }, + { name = "LORE__PLUGINS__AWS__IMMUTABLE_STORE__DYNAMODB_METADATA_TABLE", value = aws_dynamodb_table.metadata.name }, + { name = "LORE__PLUGINS__AWS__MUTABLE_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.mutable.name }, + { name = "LORE__PLUGINS__AWS__LOCK_STORE__DYNAMODB_TABLE", value = aws_dynamodb_table.locks.name }, + ] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "lore" + } + } + }, + ]) tags = local.tags } @@ -132,42 +178,77 @@ resource "aws_ecs_task_definition" "edge" { execution_role_arn = aws_iam_role.execution.arn task_role_arn = aws_iam_role.task.arn - container_definitions = jsonencode([{ - name = "loreserver" - image = var.container_image - essential = true - - portMappings = [ - { containerPort = local.port_quic_grpc, protocol = "tcp" }, - { containerPort = local.port_quic_grpc, protocol = "udp" }, - { containerPort = local.port_http, protocol = "tcp" }, - ] - - environment = [ - { name = "LORE_ENV", value = "docker" }, - { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, - - # Edge stores: replicated immutable (pulls from primary) + remote mutable (proxies to primary) - { name = "LORE__IMMUTABLE_STORE__MODE", value = "replicated" }, - { name = "LORE__IMMUTABLE_STORE__REPLICATED__REMOTE_URL", value = "lore://primary.${local.name}.internal:${local.port_quic_grpc}" }, - { name = "LORE__IMMUTABLE_STORE__REPLICATED__PERIODIC_CLIENT_REFRESH_SECS", value = "300" }, - { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__INITIAL_BACKOFF_MS", value = "100" }, - { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_BACKOFF_MS", value = "5000" }, - { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_ATTEMPTS", value = "10" }, - { name = "LORE__MUTABLE_STORE__MODE", value = "remote" }, - { name = "LORE__MUTABLE_STORE__REMOTE__REMOTE_URL", value = "lore://primary.${local.name}.internal:${local.port_quic_grpc}" }, - { name = "LORE__LOCK_STORE__MODE", value = "local" }, - ] - - logConfiguration = { - logDriver = "awslogs" - options = { - "awslogs-group" = aws_cloudwatch_log_group.lore.name - "awslogs-region" = var.region - "awslogs-stream-prefix" = "edge" + volume { + name = "certs" + } + + container_definitions = jsonencode([ + # Init container: write CA cert so edge trusts primary + { + name = "init-certs" + image = "public.ecr.aws/amazonlinux/amazonlinux:minimal" + essential = false + command = ["sh", "-c", "echo \"$CA\" > /certs/ca.pem"] + + secrets = [ + { name = "CA", valueFrom = "${aws_secretsmanager_secret.tls.arn}:ca::" }, + ] + + mountPoints = [{ sourceVolume = "certs", containerPath = "/certs", readOnly = false }] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "edge-init" + } } - } - }]) + }, + # Loreserver edge + { + name = "loreserver" + image = var.container_image + essential = true + + dependsOn = [{ containerName = "init-certs", condition = "SUCCESS" }] + + portMappings = [ + { containerPort = local.port_quic_grpc, protocol = "tcp" }, + { containerPort = local.port_quic_grpc, protocol = "udp" }, + { containerPort = local.port_http, protocol = "tcp" }, + ] + + mountPoints = [{ sourceVolume = "certs", containerPath = "/certs", readOnly = true }] + + environment = [ + { name = "LORE_ENV", value = "docker" }, + { name = "LORE_CONFIG_PATH", value = "/etc/lore/config" }, + # Trust the primary's CA for QUIC replication connection + { name = "SSL_CERT_FILE", value = "/certs/ca.pem" }, + + # Edge stores: replicated immutable (QUIC to primary:41340) + remote mutable (gRPC to primary:41337) + { name = "LORE__IMMUTABLE_STORE__MODE", value = "replicated" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REMOTE_URL", value = "lore://primary.${local.name}.internal:${local.port_replication}" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__PERIODIC_CLIENT_REFRESH_SECS", value = "300" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__INITIAL_BACKOFF_MS", value = "100" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_BACKOFF_MS", value = "5000" }, + { name = "LORE__IMMUTABLE_STORE__REPLICATED__REGENERATE_RETRY__MAX_ATTEMPTS", value = "10" }, + { name = "LORE__MUTABLE_STORE__MODE", value = "remote" }, + { name = "LORE__MUTABLE_STORE__REMOTE__REMOTE_URL", value = "lores://primary.${local.name}.internal:${local.port_quic_grpc}" }, + { name = "LORE__LOCK_STORE__MODE", value = "local" }, + ] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.lore.name + "awslogs-region" = var.region + "awslogs-stream-prefix" = "edge" + } + } + }, + ]) tags = local.tags } diff --git a/examples/aws/iam.tf b/examples/aws/iam.tf index 02c655c..de60ca1 100644 --- a/examples/aws/iam.tf +++ b/examples/aws/iam.tf @@ -84,3 +84,16 @@ resource "aws_iam_role_policy_attachment" "execution_ecr" { role = aws_iam_role.execution.name policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" } + +resource "aws_iam_role_policy" "execution_secrets" { + name_prefix = "secrets-" + role = aws_iam_role.execution.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = ["secretsmanager:GetSecretValue"] + Resource = [aws_secretsmanager_secret.tls.arn] + }] + }) +} diff --git a/examples/aws/outputs.tf b/examples/aws/outputs.tf index 5fec610..6aaae18 100644 --- a/examples/aws/outputs.tf +++ b/examples/aws/outputs.tf @@ -27,3 +27,9 @@ output "log_group" { description = "CloudWatch log group" value = aws_cloudwatch_log_group.lore.name } + +output "ca_certificate_pem" { + description = "CA certificate — clients need this to trust the server's TLS cert" + value = local.ca_pem + sensitive = true +} diff --git a/examples/aws/tls.tf b/examples/aws/tls.tf new file mode 100644 index 0000000..03eedc3 --- /dev/null +++ b/examples/aws/tls.tf @@ -0,0 +1,73 @@ +# ============================================================================= +# TLS — CA + server certificate for QUIC and gRPC between nodes +# +# The public QUIC endpoint generates an ephemeral cert if none is configured, +# but the internal replication endpoint (quic_internal) requires an explicit +# certificate. We generate a CA + server cert here so both primary and edge +# can establish trusted QUIC connections. +# ============================================================================= + +resource "tls_private_key" "ca" { + algorithm = "ECDSA" + ecdsa_curve = "P384" +} + +resource "tls_self_signed_cert" "ca" { + private_key_pem = tls_private_key.ca.private_key_pem + + subject { + common_name = "${local.name}-ca" + organization = "Lore Example" + } + + validity_period_hours = 8760 + is_ca_certificate = true + allowed_uses = ["cert_signing", "crl_signing"] +} + +resource "tls_private_key" "server" { + algorithm = "ECDSA" + ecdsa_curve = "P384" +} + +resource "tls_cert_request" "server" { + private_key_pem = tls_private_key.server.private_key_pem + + subject { + common_name = "lore-server" + organization = "Lore Example" + } + + # Cloud Map DNS name used by edge pods to reach primary + dns_names = ["primary.${local.name}.internal", "localhost"] +} + +resource "tls_locally_signed_cert" "server" { + cert_request_pem = tls_cert_request.server.cert_request_pem + ca_private_key_pem = tls_private_key.ca.private_key_pem + ca_cert_pem = tls_self_signed_cert.ca.cert_pem + + validity_period_hours = 8760 + allowed_uses = ["digital_signature", "key_encipherment", "server_auth"] +} + +# Fullchain = server cert + CA cert +locals { + fullchain_pem = "${tls_locally_signed_cert.server.cert_pem}${tls_self_signed_cert.ca.cert_pem}" + server_key = tls_private_key.server.private_key_pem + ca_pem = tls_self_signed_cert.ca.cert_pem +} + +resource "aws_secretsmanager_secret" "tls" { + name_prefix = "${local.name}-tls-" + tags = local.tags +} + +resource "aws_secretsmanager_secret_version" "tls" { + secret_id = aws_secretsmanager_secret.tls.id + secret_string = jsonencode({ + fullchain = local.fullchain_pem + key = local.server_key + ca = local.ca_pem + }) +} diff --git a/examples/aws/versions.tf b/examples/aws/versions.tf index 00bd111..bbb76b5 100644 --- a/examples/aws/versions.tf +++ b/examples/aws/versions.tf @@ -6,5 +6,9 @@ terraform { source = "hashicorp/aws" version = ">= 5.0" } + tls = { + source = "hashicorp/tls" + version = ">= 4.0" + } } } From ca5cdf3ff43a7d7fc49bed95f0c66d60a80ac806 Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Thu, 18 Jun 2026 00:15:51 +0000 Subject: [PATCH 5/7] Fix README: point clients at edge, add CA export, note region Signed-off-by: Sam Biggins --- examples/aws/README.md | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/aws/README.md b/examples/aws/README.md index ee8ad0b..cb2d28e 100644 --- a/examples/aws/README.md +++ b/examples/aws/README.md @@ -2,6 +2,8 @@ Terraform configuration that deploys a Lore server on AWS with durable S3/DynamoDB storage using ECS Fargate. +> Region is configurable via `var.region` (default: `us-west-2`). + ## What this creates - VPC with public and private subnets (2 AZs) @@ -46,13 +48,21 @@ terraform apply ## Connect -The ECS service runs in private subnets. You must connect from within the VPC -(e.g., an EC2 instance, VPN, AWS Client VPN, or SSM port-forwarding session). +The ECS services run in private subnets. Connect from within the VPC +(e.g., an EC2 instance, VPN, or AWS Client VPN). + +Export the CA certificate so the client trusts the server's QUIC endpoint: + +```sh +terraform output -raw ca_certificate_pem > lore-ca.pem +export SSL_CERT_FILE=lore-ca.pem +``` -Get the task IP: +Clients connect to the **edge** service (it replicates from the primary +automatically). Get the edge task IP: ```sh -TASK_ARN=$(aws ecs list-tasks --cluster lore-cluster --service-name lore --query 'taskArns[0]' --output text) +TASK_ARN=$(aws ecs list-tasks --cluster lore-cluster --service-name lore-edge --query 'taskArns[0]' --output text) TASK_IP=$(aws ecs describe-tasks --cluster lore-cluster --tasks "$TASK_ARN" \ --query 'tasks[0].attachments[0].details[?name==`privateIPv4Address`].value' --output text) echo "$TASK_IP" @@ -64,9 +74,9 @@ From a host inside the VPC: lore clone lore://${TASK_IP}:41337/my-repo ``` -The server generates an ephemeral self-signed certificate on startup. Use -`lore://` (plain gRPC control plane — QUIC data path still uses TLS) or -configure real certificates and use `lores://` (see Customize below). +> `lore://` uses QUIC (TLS) for data and plain gRPC for the control plane. +> The edge pod's gRPC is not TLS-configured, so `lore://` works directly. +> For `lores://` (gRPC+TLS), configure certificates on the edge pod (see Customize). ## Verify From 96edac2aec0e2785920335612535e2f3264ce2fe Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Thu, 18 Jun 2026 00:17:52 +0000 Subject: [PATCH 6/7] Rewrite README for clarity: quick start flow, tables, remove internals Signed-off-by: Sam Biggins --- examples/aws/README.md | 113 ++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/examples/aws/README.md b/examples/aws/README.md index cb2d28e..28d8af3 100644 --- a/examples/aws/README.md +++ b/examples/aws/README.md @@ -1,108 +1,99 @@ # Lore on AWS -Terraform configuration that deploys a Lore server on AWS with durable S3/DynamoDB storage using ECS Fargate. +Deploy a Lore server on AWS with durable S3/DynamoDB storage and an edge node for client access. > Region is configurable via `var.region` (default: `us-west-2`). -## What this creates +## Quick start -- VPC with public and private subnets (2 AZs) -- S3 bucket for fragment storage (immutable store) -- 4 DynamoDB tables (fragments, metadata, mutable store, locks) -- ECS Fargate primary service with S3/DynamoDB storage -- ECS Fargate edge service with replicated storage (caches from primary) -- Cloud Map private DNS for edge → primary service discovery -- Self-signed TLS CA + server certificate (inter-node trust) -- VPC endpoints for S3 and DynamoDB (reduces NAT costs) -- CloudWatch log group +### 1. Build and push the container image -## Prerequisites - -- [Terraform](https://developer.hashicorp.com/terraform/install) >= 1.5 -- AWS credentials configured (`aws configure` or environment variables) -- A `loreserver` container image in ECR — build from the repo root: +From the Lore repo root: ```sh docker build -f lore-server/Dockerfile -t loreserver . - -aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin .dkr.ecr.us-west-2.amazonaws.com -aws ecr create-repository --repository-name loreserver --region us-west-2 -docker tag loreserver:latest .dkr.ecr.us-west-2.amazonaws.com/loreserver:latest -docker push .dkr.ecr.us-west-2.amazonaws.com/loreserver:latest ``` -The Dockerfile builds the `loreserver` binary from the workspace, which includes -the `lore-aws` crate. The server's `main()` calls `register_all_plugins()` at -startup, registering the AWS (S3 + DynamoDB) and HashiCorp (Consul) plugins -automatically. No custom binary or fork is needed. +Push to ECR (replace `` and ``): + +```sh +aws ecr get-login-password --region | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com +aws ecr create-repository --repository-name loreserver --region +docker tag loreserver:latest .dkr.ecr..amazonaws.com/loreserver:latest +docker push .dkr.ecr..amazonaws.com/loreserver:latest +``` -## Deploy +### 2. Deploy ```sh cd examples/aws cp terraform.tfvars.example terraform.tfvars -# Edit terraform.tfvars — set your container_image URI and allowed_cidrs +``` + +Edit `terraform.tfvars`: + +```hcl +region = "us-west-2" +container_image = ".dkr.ecr.us-west-2.amazonaws.com/loreserver:latest" +allowed_cidrs = ["10.0.0.0/8"] # Your VPC or VPN CIDR +``` + +```sh terraform init terraform apply ``` -## Connect +### 3. Connect -The ECS services run in private subnets. Connect from within the VPC -(e.g., an EC2 instance, VPN, or AWS Client VPN). - -Export the CA certificate so the client trusts the server's QUIC endpoint: +The services run in private subnets — connect from within the VPC (EC2 instance, VPN, or Client VPN). ```sh +# Export the CA so the client trusts the server terraform output -raw ca_certificate_pem > lore-ca.pem export SSL_CERT_FILE=lore-ca.pem -``` -Clients connect to the **edge** service (it replicates from the primary -automatically). Get the edge task IP: - -```sh +# Get the edge node IP TASK_ARN=$(aws ecs list-tasks --cluster lore-cluster --service-name lore-edge --query 'taskArns[0]' --output text) TASK_IP=$(aws ecs describe-tasks --cluster lore-cluster --tasks "$TASK_ARN" \ --query 'tasks[0].attachments[0].details[?name==`privateIPv4Address`].value' --output text) -echo "$TASK_IP" -``` - -From a host inside the VPC: -```sh +# Clone a repository lore clone lore://${TASK_IP}:41337/my-repo ``` -> `lore://` uses QUIC (TLS) for data and plain gRPC for the control plane. -> The edge pod's gRPC is not TLS-configured, so `lore://` works directly. -> For `lores://` (gRPC+TLS), configure certificates on the edge pod (see Customize). +## What gets deployed -## Verify +| Component | Purpose | +|-----------|---------| +| Primary (ECS Fargate) | Stores fragments in S3 and metadata in DynamoDB | +| Edge (ECS Fargate) | Client-facing node that replicates from primary | +| Cloud Map DNS | Edge → primary service discovery | +| VPC | Private subnets, NAT, S3/DynamoDB gateway endpoints | +| TLS CA | Self-signed; establishes trust between nodes | -Check the service is running: +## Verify ```sh -aws ecs describe-services --cluster lore-cluster --services lore \ - --query 'services[0].{status:status,running:runningCount}' +aws ecs describe-services --cluster lore-cluster --services lore lore-edge \ + --query 'services[].{name:serviceName,running:runningCount}' ``` -Check server logs: - ```sh aws logs tail /ecs/lore --since 5m ``` ## Customize -This example uses the simplest viable configuration. For production: +| Need | What to change | +|------|----------------| +| External access | Add an NLB or AWS Client VPN | +| gRPC TLS for clients | Configure edge certificates, use `lores://` | +| Authentication | Set `LORE__SERVER__AUTH__JWK__ENDPOINT` ([docs](https://epicgames.github.io/lore/reference/lore-server-config/#authentication)) | +| NVMe caching | Switch to EC2, use `composite` store mode | +| More edge nodes | Duplicate the edge service definition | +| Presigned URLs | Set `LORE__SERVER__HTTP__PRESIGNED_URL_HMAC_KEY` (hex, ≥32 bytes) | -- **Ingress** — add an NLB, AWS Client VPN, or bastion host for access from outside the VPC. -- **TLS** — mount real certificates and set `LORE__SERVER__QUIC__CERTIFICATE__CERT_FILE` / `PKEY_FILE` (and the same for `GRPC`). See [Server configuration reference](https://epicgames.github.io/lore/reference/lore-server-config/#certificate-block). -- **Auth** — configure `LORE__SERVER__AUTH__JWK__ENDPOINT` to validate JWTs. See [Authentication](https://epicgames.github.io/lore/reference/lore-server-config/#authentication). -- **Caching** — switch from Fargate to EC2 with NVMe instances and use `LORE__IMMUTABLE_STORE__MODE=composite` for a local cache in front of S3. -- **Replication** — add more edge nodes or deploy to other regions. See [Topology](https://epicgames.github.io/lore/reference/lore-server-config/#topology-settings). -- **HMAC** — set `LORE__SERVER__HTTP__PRESIGNED_URL_HMAC_KEY` (hex, ≥32 bytes) to enable presigned URLs for direct client-to-S3 transfers. +Full server configuration: [Lore Server config reference](https://epicgames.github.io/lore/reference/lore-server-config/) ## Destroy @@ -110,4 +101,8 @@ This example uses the simplest viable configuration. For production: terraform destroy ``` -Teardown includes VPC and NAT gateway deletion. +## Prerequisites + +- [Terraform](https://developer.hashicorp.com/terraform/install) >= 1.5 +- AWS credentials with VPC, ECS, S3, DynamoDB, IAM, Secrets Manager, Cloud Map permissions +- Docker (to build the container image) From 89853341a15faa978e3cdb6fe0235c991db9bad4 Mon Sep 17 00:00:00 2001 From: Sam Biggins Date: Thu, 18 Jun 2026 15:14:03 +0000 Subject: [PATCH 7/7] Add integration test (terraform test with mock providers) Validates resource schemas, variable wiring, and service configuration without AWS credentials. Catches breakage from Terraform/provider version upgrades or changes to the Lore AWS plugin config contract. Run: cd examples/aws && terraform init && terraform test Signed-off-by: Sam Biggins --- examples/aws/tests/plan.tftest.hcl | 88 ++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 examples/aws/tests/plan.tftest.hcl diff --git a/examples/aws/tests/plan.tftest.hcl b/examples/aws/tests/plan.tftest.hcl new file mode 100644 index 0000000..a08c967 --- /dev/null +++ b/examples/aws/tests/plan.tftest.hcl @@ -0,0 +1,88 @@ +# Plan-level validation — runs without AWS credentials. +# Catches stale resource schemas, broken variable wiring, and +# Terraform/provider version incompatibilities. +# +# Run: terraform test + +mock_provider "aws" {} +mock_provider "tls" {} + +override_data { + target = data.aws_availability_zones.available + values = { + names = ["us-west-2a", "us-west-2b"] + } +} + +variables { + container_image = "123456789012.dkr.ecr.us-west-2.amazonaws.com/loreserver:latest" + allowed_cidrs = ["10.0.0.0/8"] + region = "us-west-2" +} + +run "primary_service_configured" { + command = plan + + assert { + condition = aws_ecs_cluster.this.name == "lore-cluster" + error_message = "Cluster name should be 'lore-cluster'" + } + + assert { + condition = aws_ecs_service.lore.name == "lore" + error_message = "Primary service name should be 'lore'" + } + + assert { + condition = aws_ecs_service.edge.name == "lore-edge" + error_message = "Edge service name should be 'lore-edge'" + } +} + +run "storage_schemas_correct" { + command = plan + + assert { + condition = aws_dynamodb_table.fragments.hash_key == "hash" + error_message = "Fragments table hash key must be 'hash'" + } + + assert { + condition = aws_dynamodb_table.fragments.range_key == "repository_context" + error_message = "Fragments table range key must be 'repository_context'" + } + + assert { + condition = aws_dynamodb_table.metadata.hash_key == "hash" + error_message = "Metadata table hash key must be 'hash'" + } + + assert { + condition = aws_dynamodb_table.mutable.hash_key == "repository_id" + error_message = "Mutable table hash key must be 'repository_id'" + } + + assert { + condition = aws_dynamodb_table.locks.hash_key == "hash" + error_message = "Locks table hash key must be 'hash'" + } + + assert { + condition = aws_dynamodb_table.locks.range_key == "repositoryBranch" + error_message = "Locks table range key must be 'repositoryBranch'" + } +} + +run "service_discovery_configured" { + command = plan + + assert { + condition = aws_service_discovery_private_dns_namespace.this.name == "lore.internal" + error_message = "Cloud Map namespace should be 'lore.internal'" + } + + assert { + condition = aws_service_discovery_service.lore.name == "primary" + error_message = "Cloud Map service name should be 'primary'" + } +}