Skip to content

Commit a19cfd3

Browse files
committed
WIP file set annotation route
1 parent 4a5b705 commit a19cfd3

7 files changed

Lines changed: 332 additions & 1 deletion

File tree

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
const { wrap } = require("./middleware");
2+
const { getFileSet } = require("../api/opensearch");
3+
const { appInfo } = require("../environment");
4+
const opensearchResponse = require("../api/response/opensearch");
5+
6+
/**
7+
* Returns annotations for a FileSet
8+
*/
9+
exports.handler = wrap(async (event) => {
10+
const id = event.pathParameters.id;
11+
const allowPrivate =
12+
event.userToken.isSuperUser() || event.userToken.isReadingRoom();
13+
const allowUnpublished = event.userToken.isSuperUser();
14+
15+
const esResponse = await getFileSet(id, { allowPrivate, allowUnpublished });
16+
if (esResponse.statusCode !== 200) {
17+
return await opensearchResponse.transform(esResponse);
18+
}
19+
20+
const body = JSON.parse(esResponse.body);
21+
const annotations = body?._source?.annotations ?? null;
22+
23+
return {
24+
statusCode: 200,
25+
headers: {
26+
"content-type": "application/json",
27+
},
28+
body: JSON.stringify({
29+
data: annotations,
30+
info: appInfo(),
31+
}),
32+
};
33+
});

api/template.yaml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,30 @@ Resources:
334334
ApiId: !Ref dcApi
335335
Path: /file-sets/{id}
336336
Method: HEAD
337+
getFileSetAnnotationsFunction:
338+
Type: AWS::Serverless::Function
339+
Condition: DeployAPI
340+
Properties:
341+
Handler: handlers/get-file-set-annotations.handler
342+
Description: Gets annotations for a FileSet.
343+
#* Layers:
344+
#* - !Ref apiDependencies
345+
Policies:
346+
- !Ref SecretsPolicy
347+
- !Ref readIndexPolicy
348+
Events:
349+
ApiGet:
350+
Type: HttpApi
351+
Properties:
352+
ApiId: !Ref dcApi
353+
Path: /file-sets/{id}/annotations
354+
Method: GET
355+
ApiHead:
356+
Type: HttpApi
357+
Properties:
358+
ApiId: !Ref dcApi
359+
Path: /file-sets/{id}/annotations
360+
Method: HEAD
337361
getFileSetAuthFunction:
338362
Type: AWS::Serverless::Function
339363
Condition: DeployAPI
@@ -969,4 +993,4 @@ Resources:
969993
</table>
970994
<!--[if gte mso 15]></td></tr></table><![endif]-->
971995
</body>
972-
</html>
996+
</html>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"_index": "dev-dc-v2-file-set",
3+
"_type": "_doc",
4+
"_id": "1234",
5+
"_version": 1,
6+
"found": true,
7+
"_source": {
8+
"id": "1234",
9+
"api_model": "FileSet",
10+
"visibility": "Public",
11+
"published": true,
12+
"mime_type": "audio/mpeg",
13+
"annotations": [
14+
{
15+
"id": "36a47020-5410-4dda-a7ca-967fe3885bcd",
16+
"type": "transcription",
17+
"language": ["lg", "en"],
18+
"content": "ABAVUBI N'ABAKOLA EMIRIMU EGYEKUUSA KU KUVUBA TWENYIGIRE MU KUZIYIZA AKAWUKA AKALEETA SIRIIMU\n\nWuumira imo mukyala esezenga ye tituufu obuseero bwe bojja twekebezze, otte nobujjuuirizi ayeibuli\n\nMUSONZI LC\n\nMwami eno naffe Tubye twekebezza ekwanke ku kuvuba, kyokka tusobola otulunge emirinu ayeffe.\n\nLAKE VICTORIA FISHERIES ORGANISATION OF THE EAST AFRICAN COMMUNITY (LVFO)\nFIGHTING VECTOR-BORNE DISEASES\nPROMOTED GOOD ENVIRONMENTAL\n\nKALANGALA DISTRICT FORUM OF PEOPLE LIVING WITH HIV/AIDS HAS NO MORE (KADIFOHM)\nTHE LIFE YOU SAVE IS YOUR OWN\nUnited for a positive difference",
19+
"model": "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
20+
}
21+
]
22+
}
23+
}

api/test/integration/get-doc.test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,43 @@ describe("Doc retrieval routes", () => {
169169
});
170170
});
171171

172+
describe("GET /file-sets/{id}/annotations", () => {
173+
const { handler } = requireSource("handlers/get-file-set-annotations");
174+
175+
it("returns annotations for a file-set", async () => {
176+
mock
177+
.get("/dc-v2-file-set/_doc/1234")
178+
.reply(200, helpers.testFixture("mocks/fileset-annotated-1234.json"));
179+
180+
const event = helpers
181+
.mockEvent("GET", "/file-sets/{id}/annotations")
182+
.pathParams({ id: 1234 })
183+
.render();
184+
const result = await handler(event);
185+
expect(result.statusCode).to.eq(200);
186+
187+
const body = JSON.parse(result.body);
188+
expect(body.data).to.be.an("array").with.lengthOf(1);
189+
expect(body.data[0].type).to.eq("transcription");
190+
});
191+
192+
it("returns null when no annotations exist", async () => {
193+
mock
194+
.get("/dc-v2-file-set/_doc/1234")
195+
.reply(200, helpers.testFixture("mocks/fileset-1234.json"));
196+
197+
const event = helpers
198+
.mockEvent("GET", "/file-sets/{id}/annotations")
199+
.pathParams({ id: 1234 })
200+
.render();
201+
const result = await handler(event);
202+
expect(result.statusCode).to.eq(200);
203+
204+
const body = JSON.parse(result.body);
205+
expect(body.data).to.eq(null);
206+
});
207+
});
208+
172209
describe("Superuser", () => {
173210
helpers.saveEnvironment();
174211
let event;

chat/dependencies/requirements.txt

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# This file was autogenerated by uv via the following command:
2+
# uv export --format requirements-txt --no-hashes
3+
annotated-types==0.7.0
4+
# via pydantic
5+
anyio==4.11.0
6+
# via
7+
# httpx
8+
# openai
9+
boto3==1.40.47
10+
# via
11+
# dc-api-v2-chat
12+
# langchain-aws
13+
botocore==1.40.47
14+
# via
15+
# boto3
16+
# s3transfer
17+
certifi==2025.10.5
18+
# via
19+
# httpcore
20+
# httpx
21+
# opensearch-py
22+
# requests
23+
charset-normalizer==3.4.3
24+
# via requests
25+
colorama==0.4.6 ; sys_platform == 'win32'
26+
# via tqdm
27+
distro==1.9.0
28+
# via openai
29+
events==0.5
30+
# via opensearch-py
31+
greenlet==3.2.4 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
32+
# via sqlalchemy
33+
h11==0.16.0
34+
# via httpcore
35+
honeybadger==0.23.1
36+
# via dc-api-v2-chat
37+
httpcore==1.0.9
38+
# via httpx
39+
httpx==0.28.1
40+
# via
41+
# langgraph-sdk
42+
# langsmith
43+
# openai
44+
idna==3.10
45+
# via
46+
# anyio
47+
# httpx
48+
# requests
49+
jiter==0.11.0
50+
# via openai
51+
jmespath==1.0.1
52+
# via
53+
# boto3
54+
# botocore
55+
jsonpatch==1.33
56+
# via langchain-core
57+
jsonpointer==3.0.0
58+
# via jsonpatch
59+
langchain==0.3.27
60+
# via dc-api-v2-chat
61+
langchain-aws==0.2.35
62+
# via dc-api-v2-chat
63+
langchain-core==0.3.78
64+
# via
65+
# langchain
66+
# langchain-aws
67+
# langchain-openai
68+
# langchain-text-splitters
69+
# langgraph
70+
# langgraph-checkpoint
71+
# langgraph-prebuilt
72+
langchain-openai==0.3.35
73+
# via dc-api-v2-chat
74+
langchain-text-splitters==0.3.11
75+
# via langchain
76+
langgraph==0.6.8
77+
# via dc-api-v2-chat
78+
langgraph-checkpoint==2.1.2
79+
# via
80+
# langgraph
81+
# langgraph-prebuilt
82+
langgraph-prebuilt==0.6.4
83+
# via langgraph
84+
langgraph-sdk==0.2.9
85+
# via langgraph
86+
langsmith==0.4.33
87+
# via
88+
# langchain
89+
# langchain-core
90+
numpy==2.2.6
91+
# via
92+
# dc-api-v2-chat
93+
# langchain-aws
94+
openai==1.109.1
95+
# via
96+
# dc-api-v2-chat
97+
# langchain-openai
98+
opensearch-py==2.8.0
99+
# via dc-api-v2-chat
100+
orjson==3.11.3
101+
# via
102+
# langgraph-sdk
103+
# langsmith
104+
ormsgpack==1.10.0
105+
# via langgraph-checkpoint
106+
packaging==25.0
107+
# via
108+
# langchain-core
109+
# langsmith
110+
psutil==7.1.0
111+
# via honeybadger
112+
pydantic==2.12.0
113+
# via
114+
# langchain
115+
# langchain-aws
116+
# langchain-core
117+
# langgraph
118+
# langsmith
119+
# openai
120+
pydantic-core==2.41.1
121+
# via pydantic
122+
pyjwt==2.6.0
123+
# via dc-api-v2-chat
124+
python-dateutil==2.9.0.post0
125+
# via
126+
# botocore
127+
# opensearch-py
128+
python-dotenv==1.0.1
129+
# via dc-api-v2-chat
130+
pyyaml==6.0.3
131+
# via
132+
# langchain
133+
# langchain-core
134+
regex==2025.9.18
135+
# via tiktoken
136+
requests==2.32.5
137+
# via
138+
# dc-api-v2-chat
139+
# langchain
140+
# langsmith
141+
# opensearch-py
142+
# requests-aws4auth
143+
# requests-toolbelt
144+
# tiktoken
145+
requests-aws4auth==1.3.1
146+
# via dc-api-v2-chat
147+
requests-toolbelt==1.0.0
148+
# via langsmith
149+
s3transfer==0.14.0
150+
# via boto3
151+
six==1.17.0
152+
# via
153+
# honeybadger
154+
# python-dateutil
155+
sniffio==1.3.1
156+
# via
157+
# anyio
158+
# openai
159+
sqlalchemy==2.0.43
160+
# via langchain
161+
tenacity==9.1.2
162+
# via langchain-core
163+
tiktoken==0.11.0
164+
# via
165+
# dc-api-v2-chat
166+
# langchain-openai
167+
tqdm==4.67.1
168+
# via openai
169+
typing-extensions==4.15.0
170+
# via
171+
# anyio
172+
# langchain-core
173+
# openai
174+
# pydantic
175+
# pydantic-core
176+
# sqlalchemy
177+
# typing-inspection
178+
typing-inspection==0.4.2
179+
# via pydantic
180+
urllib3==2.5.0
181+
# via
182+
# botocore
183+
# opensearch-py
184+
# requests
185+
wheel==0.45.1
186+
# via dc-api-v2-chat
187+
xxhash==3.6.0
188+
# via langgraph
189+
zstandard==0.25.0
190+
# via langsmith

docs/docs/spec/openapi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,16 @@ paths:
168168
responses:
169169
200:
170170
$ref: "./types.yaml#/components/responses/DocumentResponse"
171+
/file-sets/{id}/annotations:
172+
get:
173+
operationId: getFileSetAnnotations
174+
tags:
175+
- FileSet
176+
parameters:
177+
- $ref: "./types.yaml#/components/parameters/id"
178+
responses:
179+
200:
180+
$ref: "./types.yaml#/components/responses/AnnotationsResponse"
171181
/file-sets/{id}/authorization:
172182
get:
173183
operationId: getFileSetAuth

docs/docs/spec/types.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,20 @@ components:
115115
$ref: "#/components/schemas/IndexDocument"
116116
info:
117117
type: object
118+
AnnotationsResponse:
119+
description: Annotations for a FileSet
120+
content:
121+
application/json:
122+
schema:
123+
type: object
124+
properties:
125+
data:
126+
type: array
127+
items:
128+
$ref: "./data-types.yaml#/components/schemas/Annotation"
129+
nullable: true
130+
info:
131+
$ref: "./data-types.yaml#/components/schemas/Info"
118132
SearchResponse:
119133
description: A page of search results
120134
content:

0 commit comments

Comments
 (0)