From b6d0834e4db9748e83124a56d09ff1b9a192952e Mon Sep 17 00:00:00 2001 From: Jayesh45-master Date: Sat, 27 Dec 2025 23:55:09 +0530 Subject: [PATCH] create an adl search service --- adl-search-service/Dockerfile | 9 +++ adl-search-service/README.md | 29 ++++++++ adl-search-service/app/main.py | 102 ++++++++++++++++++++++++++++ adl-search-service/requirements.txt | 6 ++ adl-search-service/sample_adls.json | 20 ++++++ 5 files changed, 166 insertions(+) create mode 100644 adl-search-service/Dockerfile create mode 100644 adl-search-service/README.md create mode 100644 adl-search-service/app/main.py create mode 100644 adl-search-service/requirements.txt create mode 100644 adl-search-service/sample_adls.json diff --git a/adl-search-service/Dockerfile b/adl-search-service/Dockerfile new file mode 100644 index 00000000..36bfd16b --- /dev/null +++ b/adl-search-service/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt +COPY . /app +ENV QDRANT_HOST=qdrant +ENV QDRANT_PORT=6333 +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/adl-search-service/README.md b/adl-search-service/README.md new file mode 100644 index 00000000..6218c75e --- /dev/null +++ b/adl-search-service/README.md @@ -0,0 +1,29 @@ +# ADL Search Service + +This service indexes ADLs into Qdrant and returns semantically related ADLs for a conversation. + +Run locally (assumes Qdrant running on localhost:6333): + +```bash +python -m pip install -r requirements.txt +uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 +``` + +Environment variables: +- `QDRANT_HOST` (default: `localhost`) +- `QDRANT_PORT` (default: `6333`) +- `QDRANT_COLLECTION` (default: `adl_collection`) + +Index sample ADLs: + +```bash +curl -X POST "http://localhost:8000/index" -H "Content-Type: application/json" \ + -d @sample_adls.json +``` + +Query example: + +```bash +curl -X POST "http://localhost:8000/query" -H "Content-Type: application/json" \ + -d '{"conversation": "I need something that parses PDFs and extracts invoice line items", "top_k": 3}' +``` diff --git a/adl-search-service/app/main.py b/adl-search-service/app/main.py new file mode 100644 index 00000000..80645c9e --- /dev/null +++ b/adl-search-service/app/main.py @@ -0,0 +1,102 @@ +from typing import List, Optional +import os +import json + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from sentence_transformers import SentenceTransformer +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams + +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) +COLLECTION_NAME = os.getenv("QDRANT_COLLECTION", "adl_collection") + +app = FastAPI(title="ADL Search Service") + + +class ADL(BaseModel): + id: str + title: Optional[str] + content: str + metadata: Optional[dict] = None + + +class ConversationRequest(BaseModel): + conversation: str + top_k: Optional[int] = 5 + + +def get_model(): + return SentenceTransformer("all-MiniLM-L6-v2") + + +def get_qdrant_client(): + return QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + + +@app.on_event("startup") +def startup_event(): + global model, qdrant + model = get_model() + qdrant = get_qdrant_client() + # Ensure collection exists + try: + if COLLECTION_NAME not in [c.name for c in qdrant.get_collections().result]: + vector_size = model.get_sentence_embedding_dimension() + qdrant.recreate_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE), + ) + except Exception: + # older qdrant-client may return different types, fallback to simple create + vector_size = model.get_sentence_embedding_dimension() + try: + qdrant.create_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE), + ) + except Exception: + pass + + +@app.post("/index") +def index_adls(adls: List[ADL]): + if not adls: + raise HTTPException(status_code=400, detail="No ADLs provided") + texts = [a.content for a in adls] + ids = [a.id for a in adls] + embeddings = model.encode(texts, show_progress_bar=False).tolist() + points = [ + {"id": ids[i], "vector": embeddings[i], "payload": {"title": adls[i].title, "metadata": adls[i].metadata}} + for i in range(len(adls)) + ] + qdrant.upsert(collection_name=COLLECTION_NAME, points=points) + return {"indexed": len(points)} + + +@app.post("/query") +def query_adls(req: ConversationRequest): + if not req.conversation: + raise HTTPException(status_code=400, detail="Conversation text required") + q_emb = model.encode([req.conversation])[0].tolist() + search_result = qdrant.search(collection_name=COLLECTION_NAME, query_vector=q_emb, limit=req.top_k) + results = [] + for r in search_result: + results.append({ + "id": r.id, + "score": r.score, + "payload": r.payload, + }) + return {"results": results} + + +@app.get("/health") +def health(): + return {"status": "ok"} + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/adl-search-service/requirements.txt b/adl-search-service/requirements.txt new file mode 100644 index 00000000..3dfbba30 --- /dev/null +++ b/adl-search-service/requirements.txt @@ -0,0 +1,6 @@ +fastapi==0.95.2 +uvicorn[standard]==0.22.0 +qdrant-client==1.13.0 +sentence-transformers==2.2.2 +pydantic==1.10.12 +python-dotenv==1.0.0 \ No newline at end of file diff --git a/adl-search-service/sample_adls.json b/adl-search-service/sample_adls.json new file mode 100644 index 00000000..6da6c037 --- /dev/null +++ b/adl-search-service/sample_adls.json @@ -0,0 +1,20 @@ +[ + { + "id": "adl-1", + "title": "Weather Data Collector", + "content": "Collects temperature and humidity from IoT sensors every 5 minutes and stores aggregated metrics.", + "metadata": {"domain": "weather", "created_by": "team-a"} + }, + { + "id": "adl-2", + "title": "User Onboarding Assistant", + "content": "Guides new users through account setup, email verification, and initial preferences.", + "metadata": {"domain": "ux", "created_by": "team-b"} + }, + { + "id": "adl-3", + "title": "Invoice Processor", + "content": "Parses incoming invoice PDFs, extracts line items and posts them to accounting system.", + "metadata": {"domain": "finance", "created_by": "team-c"} + } +]