-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtemplate.yaml
More file actions
282 lines (268 loc) · 8.53 KB
/
template.yaml
File metadata and controls
282 lines (268 loc) · 8.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: OmniCloud Agent - Claude Agent SDK based Telegram Bot
Parameters:
TelegramBotToken:
Type: String
NoEcho: true
BedrockAccessKeyId:
Type: String
NoEcho: true
BedrockSecretAccessKey:
Type: String
NoEcho: true
AuthToken:
Type: String
Default: 'change-me-in-production'
NoEcho: true
BedrockHaikuModelArn:
Type: String
Default: ''
Description: "(Optional) ARN for Bedrock Haiku model"
NoEcho: true
BedrockSonnetModelArn:
Type: String
Default: ''
Description: "(Optional) ARN for Bedrock Sonnet model"
NoEcho: true
BedrockOpusModelArn:
Type: String
Default: ''
Description: "(Optional) ARN for Bedrock Opus 4.5 model"
NoEcho: true
TelegramWebhookSecret:
Type: String
Default: ''
Description: "(Optional) Secret token for Telegram webhook verification"
NoEcho: true
Globals:
Function:
Timeout: 900
MemorySize: 512
Architectures:
- arm64
Resources:
# S3 Bucket for session files
SessionBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub 'omnicloud-agent-sessions-${AWS::AccountId}'
LifecycleConfiguration:
Rules:
- Id: ExpireOldSessions
Status: Enabled
ExpirationInDays: 30
# DynamoDB Table for session mapping
SessionTable:
Type: AWS::DynamoDB::Table
Properties:
TableName: omnicloud-agent-sessions
BillingMode: PAY_PER_REQUEST
AttributeDefinitions:
- AttributeName: session_key
AttributeType: S
KeySchema:
- AttributeName: session_key
KeyType: HASH
TimeToLiveSpecification:
AttributeName: ttl
Enabled: true
# SQS Task Queue (FIFO for session message ordering)
TaskQueue:
Type: AWS::SQS::Queue
Properties:
QueueName: !Sub '${AWS::StackName}-TaskQueue.fifo'
FifoQueue: true
ContentBasedDeduplication: false
DeduplicationScope: messageGroup
FifoThroughputLimit: perMessageGroupId
VisibilityTimeout: 900 # 15 minutes = Lambda timeout
MessageRetentionPeriod: 1209600 # 14 days
RedrivePolicy:
deadLetterTargetArn: !GetAtt DLQueue.Arn
maxReceiveCount: 3 # Retry 3 times then move to DLQ
# Dead Letter Queue (FIFO to match TaskQueue)
DLQueue:
Type: AWS::SQS::Queue
Properties:
QueueName: !Sub '${AWS::StackName}-DLQueue.fifo'
FifoQueue: true
ContentBasedDeduplication: true
MessageRetentionPeriod: 1209600 # 14 days
# SNS Topic for alarm notifications
AlarmTopic:
Type: AWS::SNS::Topic
Properties:
TopicName: !Sub '${AWS::StackName}-Alarms'
DisplayName: OmniCloud Agent Alarms
# Agent Server Lambda (Container)
AgentServerFunction:
Type: AWS::Serverless::Function
Metadata:
DockerTag: agent-server
DockerContext: ./agent-sdk-server
Dockerfile: Dockerfile
Properties:
PackageType: Image
MemorySize: 2048
EphemeralStorage:
Size: 2048
Environment:
Variables:
# App config
SESSION_BUCKET: !Ref SessionBucket
SESSION_TABLE: !Ref SessionTable
PROJECT_PATH: '-tmp-workspace'
SDK_CLIENT_AUTH_TOKEN: !Ref AuthToken
# Bedrock credentials (runtime injection)
CLAUDE_CODE_USE_BEDROCK: '1'
BEDROCK_ACCESS_KEY_ID: !Ref BedrockAccessKeyId
BEDROCK_SECRET_ACCESS_KEY: !Ref BedrockSecretAccessKey
# Claude Code config (static)
CLAUDE_CONFIG_DIR: '/tmp/.claude-code'
AWS_SHARED_CREDENTIALS_FILE: '/tmp/.aws/credentials'
DISABLE_AUTOUPDATER: '1'
DISABLE_TELEMETRY: '1'
# Bedrock model ARNs
ANTHROPIC_DEFAULT_HAIKU_MODEL: !Ref BedrockHaikuModelArn
ANTHROPIC_DEFAULT_SONNET_MODEL: !Ref BedrockSonnetModelArn
ANTHROPIC_DEFAULT_OPUS_MODEL: !Ref BedrockOpusModelArn
Policies:
- S3CrudPolicy:
BucketName: !Ref SessionBucket
- DynamoDBCrudPolicy:
TableName: !Ref SessionTable
FunctionUrlConfig:
AuthType: NONE
# SDK Client Lambda - Producer (writes to SQS, returns 200 immediately)
SdkClientFunction:
Type: AWS::Serverless::Function
Properties:
CodeUri: ./agent-sdk-client
Handler: handler.lambda_handler
Runtime: python3.12
Timeout: 10 # Producer should complete quickly (<1s for message validation + SQS write)
Environment:
Variables:
TELEGRAM_BOT_TOKEN: !Ref TelegramBotToken
AGENT_SERVER_URL: !GetAtt AgentServerFunctionUrl.FunctionUrl
SDK_CLIENT_AUTH_TOKEN: !Ref AuthToken
QUEUE_URL: !Ref TaskQueue
TELEGRAM_WEBHOOK_SECRET: !Ref TelegramWebhookSecret
SESSION_TABLE: !Ref SessionTable
SESSION_BUCKET: !Ref SessionBucket
Policies:
- SQSSendMessagePolicy:
QueueName: !GetAtt TaskQueue.QueueName
- Statement:
- Effect: Allow
Action:
- cloudwatch:PutMetricData
Resource: '*'
- DynamoDBReadPolicy:
TableName: !Ref SessionTable
- S3ReadPolicy:
BucketName: !Ref SessionBucket
Events:
Webhook:
Type: HttpApi
Properties:
Path: /webhook
Method: POST
# Consumer Lambda - processes messages from SQS
ConsumerFunction:
Type: AWS::Serverless::Function
Properties:
CodeUri: ./agent-sdk-client
Handler: consumer.lambda_handler
Runtime: python3.12
Timeout: 900 # 15 minutes for long Agent Server calls
Environment:
Variables:
TELEGRAM_BOT_TOKEN: !Ref TelegramBotToken
AGENT_SERVER_URL: !GetAtt AgentServerFunctionUrl.FunctionUrl
SDK_CLIENT_AUTH_TOKEN: !Ref AuthToken
QUEUE_URL: !Ref TaskQueue
SESSION_TABLE: !Ref SessionTable
Policies:
- SQSPollerPolicy:
QueueName: !GetAtt TaskQueue.QueueName
- DynamoDBCrudPolicy:
TableName: !Ref SessionTable
Events:
SQSEvent:
Type: SQS
Properties:
Queue: !GetAtt TaskQueue.Arn
BatchSize: 1 # Process one message at a time
MaximumBatchingWindowInSeconds: 0 # Process immediately
# DLQ Alarm - alert when messages land in DLQ
DLQAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub '${AWS::StackName}-DLQ-Messages'
AlarmDescription: Alert when messages land in Dead Letter Queue
MetricName: ApproximateNumberOfMessagesVisible
Namespace: AWS/SQS
Statistic: Sum
Period: 300
EvaluationPeriods: 1
Threshold: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
AlarmActions:
- !Ref AlarmTopic
Dimensions:
- Name: QueueName
Value: !GetAtt DLQueue.QueueName
# Producer SQS send error alarm
ProducerSQSErrorAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub '${AWS::StackName}-Producer-SQS-Errors'
AlarmDescription: Alert when Producer fails to send messages to SQS
MetricName: SQSError.QueueNotFound
Namespace: OmniCloudAgent/Producer
Statistic: Sum
Period: 60
EvaluationPeriods: 1
Threshold: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
AlarmActions:
- !Ref AlarmTopic
TreatMissingData: notBreaching
# Producer Lambda error alarm
ProducerLambdaErrorAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub '${AWS::StackName}-Producer-Lambda-Errors'
AlarmDescription: Alert on Producer Lambda exceptions
MetricName: Errors
Namespace: AWS/Lambda
Statistic: Sum
Period: 60
EvaluationPeriods: 1
Threshold: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
AlarmActions:
- !Ref AlarmTopic
Dimensions:
- Name: FunctionName
Value: !Ref SdkClientFunction
Outputs:
WebhookUrl:
Description: Telegram Webhook URL
Value: !Sub 'https://${ServerlessHttpApi}.execute-api.${AWS::Region}.amazonaws.com/webhook'
AgentServerUrl:
Description: Agent Server Function URL
Value: !GetAtt AgentServerFunctionUrl.FunctionUrl
TaskQueueUrl:
Description: Task Queue URL
Value: !Ref TaskQueue
DLQueueUrl:
Description: Dead Letter Queue URL
Value: !Ref DLQueue
AlarmTopicArn:
Description: SNS Topic for CloudWatch Alarm notifications
Value: !Ref AlarmTopic
Export:
Name: !Sub '${AWS::StackName}-AlarmTopic'