Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added .env.example
Empty file.
49 changes: 48 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,48 @@
.env
# Environment variables
.env
.env.local

# Python bytecode
__pycache__/
*.py[cod]
*$py.class
*.so

# Virtual environments
venv/
.venv/
env/

# Distribution / packaging
dist/
build/
*.egg-info/
*.egg

# Testing / Coverage
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/

# IDEs
.idea/
.vscode/
*.swp
*.swo

# Jupyter
.ipynb_checkpoints/

# mypy / type checkers
.mypy_cache/
.dmypy.json
dmypy.json
.pytype/

# Logs
*.log

# Test reports
test_report.txt
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,29 @@
1. Create a `.env` file in the root directory of the project
2. Copy the contents of the provided `.env` file into your local `.env` file

## Running the API

Start the server:
```bash
uvicorn app.main:app --reload
```

The API will be available at `http://localhost:8000`

### API Documentation

- Swagger UI: `http://localhost:8000/docs`
- ReDoc: `http://localhost:8000/redoc`

### Available Endpoints

| Method | Endpoint | Description |
|--------|----------|-------------|
| POST | `/transcripts/analyze` | Analyze a single transcript |
| GET | `/transcripts/{id}` | Get analysis by ID |
| POST | `/transcripts/analyze/batch` | Analyze multiple transcripts concurrently |
| GET | `/health` | Health check |

## Running Tests

To run the tests, make sure you have:
Expand Down
Binary file removed app/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed app/__pycache__/configurations.cpython-312.pyc
Binary file not shown.
Binary file removed app/adapters/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed app/adapters/__pycache__/openai.cpython-312.pyc
Binary file not shown.
19 changes: 19 additions & 0 deletions app/adapters/in_memory_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""In-memory implementation of the transcript repository."""

from app.domain.models import TranscriptAnalysis
from app.ports.repository import TranscriptRepository


class InMemoryTranscriptRepository(TranscriptRepository):
"""Stores transcript analyses in memory using a dictionary."""

def __init__(self) -> None:
self._storage: dict[str, TranscriptAnalysis] = {}

def save(self, analysis: TranscriptAnalysis) -> None:
"""Store an analysis indexed by its ID."""
self._storage[analysis.id] = analysis

def get_by_id(self, analysis_id: str) -> TranscriptAnalysis | None:
"""Retrieve an analysis by ID, or None if not found."""
return self._storage.get(analysis_id)
Empty file added app/api/__init__.py
Empty file.
32 changes: 32 additions & 0 deletions app/api/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""FastAPI dependency injection configuration."""

from functools import lru_cache

from app.adapters.in_memory_repo import InMemoryTranscriptRepository
from app.adapters.openai import OpenAIAdapter
from app.application.services import TranscriptService
from app.configurations import EnvConfigs
from app.ports.repository import TranscriptRepository


@lru_cache
def get_settings() -> EnvConfigs:
"""Get cached application settings."""
return EnvConfigs()


@lru_cache
def get_repository() -> TranscriptRepository:
"""Get singleton repository instance."""
return InMemoryTranscriptRepository()


def get_transcript_service() -> TranscriptService:
"""Create and return the transcript service with dependencies."""
settings = get_settings()
llm = OpenAIAdapter(
api_key=settings.OPENAI_API_KEY,
model=settings.OPENAI_MODEL,
)
repository = get_repository()
return TranscriptService(llm=llm, repository=repository)
76 changes: 76 additions & 0 deletions app/api/routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""API route definitions for transcript analysis."""

from fastapi import APIRouter, Depends, HTTPException, status

from app.api.dependencies import get_transcript_service
from app.api.schemas import (
AnalyzeRequest,
AnalyzeResponse,
BatchAnalyzeRequest,
BatchAnalyzeResponse,
)
from app.application.services import TranscriptService
from app.domain.models import TranscriptAnalysis

router = APIRouter(prefix="/transcripts", tags=["transcripts"])


def _to_response(analysis: TranscriptAnalysis) -> AnalyzeResponse:
"""Convert domain model to API response schema."""
return AnalyzeResponse(
id=analysis.id,
summary=analysis.summary,
action_items=analysis.action_items,
)


@router.post(
"/analyze",
response_model=AnalyzeResponse,
status_code=status.HTTP_201_CREATED,
summary="Analyze a transcript",
description="Analyzes a transcript and returns a summary with action items.",
)
def analyze_transcript(
request: AnalyzeRequest,
service: TranscriptService = Depends(get_transcript_service),
) -> AnalyzeResponse:
"""Analyze a single transcript."""
analysis = service.analyze(request.transcript)
return _to_response(analysis)


@router.get(
"/{transcript_id}",
response_model=AnalyzeResponse,
summary="Get analysis by ID",
description="Retrieves a previously analyzed transcript by its ID.",
)
def get_transcript(
transcript_id: str,
service: TranscriptService = Depends(get_transcript_service),
) -> AnalyzeResponse:
"""Retrieve a transcript analysis by ID."""
analysis = service.get_by_id(transcript_id)
if analysis is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Transcript analysis not found",
)
return _to_response(analysis)


@router.post(
"/analyze/batch",
response_model=BatchAnalyzeResponse,
status_code=status.HTTP_201_CREATED,
summary="Analyze multiple transcripts",
description="Analyzes multiple transcripts concurrently.",
)
async def analyze_batch(
request: BatchAnalyzeRequest,
service: TranscriptService = Depends(get_transcript_service),
) -> BatchAnalyzeResponse:
"""Analyze multiple transcripts concurrently."""
analyses = await service.analyze_batch(request.transcripts)
return BatchAnalyzeResponse(results=[_to_response(a) for a in analyses])
46 changes: 46 additions & 0 deletions app/api/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Request and response schemas for the API."""

import pydantic


class AnalyzeRequest(pydantic.BaseModel):
"""Request body for transcript analysis."""

transcript: str = pydantic.Field(..., min_length=1)

@pydantic.field_validator("transcript")
@classmethod
def validate_transcript_not_empty(cls, v: str) -> str:
"""Ensure transcript is not empty or whitespace-only."""
if not v.strip():
raise ValueError("Transcript cannot be empty or contain only whitespace")
return v


class AnalyzeResponse(pydantic.BaseModel):
"""Response body for a single transcript analysis."""

id: str
summary: str
action_items: list[str]


class BatchAnalyzeRequest(pydantic.BaseModel):
"""Request body for batch transcript analysis."""

transcripts: list[str] = pydantic.Field(..., min_length=1)

@pydantic.field_validator("transcripts")
@classmethod
def validate_transcripts_not_empty(cls, v: list[str]) -> list[str]:
"""Ensure no transcript in the list is empty."""
for i, transcript in enumerate(v):
if not transcript or not transcript.strip():
raise ValueError(f"Transcript at index {i} cannot be empty")
return v


class BatchAnalyzeResponse(pydantic.BaseModel):
"""Response body for batch transcript analysis."""

results: list[AnalyzeResponse]
Empty file added app/application/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions app/application/dtos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""DTOs for LLM structured output responses."""

import pydantic


class AnalysisLLMResponse(pydantic.BaseModel):
"""Response structure expected from the LLM analysis."""

summary: str
action_items: list[str]
62 changes: 62 additions & 0 deletions app/application/services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Application services for transcript analysis."""

import asyncio

from app.application.dtos import AnalysisLLMResponse
from app.domain.models import TranscriptAnalysis
from app.ports.llm import LLm
from app.ports.repository import TranscriptRepository
from app.prompts import RAW_USER_PROMPT, SYSTEM_PROMPT


class TranscriptService:
"""Orchestrates transcript analysis using LLM and repository."""

def __init__(self, llm: LLm, repository: TranscriptRepository) -> None:
self._llm = llm
self._repository = repository

def analyze(self, transcript: str) -> TranscriptAnalysis:
"""Analyze a transcript and persist the result."""
user_prompt = RAW_USER_PROMPT.format(transcript=transcript)

llm_response: AnalysisLLMResponse = self._llm.run_completion(
system_prompt=SYSTEM_PROMPT,
user_prompt=user_prompt,
dto=AnalysisLLMResponse,
)

analysis = TranscriptAnalysis(
summary=llm_response.summary,
action_items=llm_response.action_items,
)

self._repository.save(analysis)
return analysis

async def analyze_async(self, transcript: str) -> TranscriptAnalysis:
"""Analyze a transcript asynchronously and persist the result."""
user_prompt = RAW_USER_PROMPT.format(transcript=transcript)

llm_response: AnalysisLLMResponse = await self._llm.run_completion_async(
system_prompt=SYSTEM_PROMPT,
user_prompt=user_prompt,
dto=AnalysisLLMResponse,
)

analysis = TranscriptAnalysis(
summary=llm_response.summary,
action_items=llm_response.action_items,
)

self._repository.save(analysis)
return analysis

async def analyze_batch(self, transcripts: list[str]) -> list[TranscriptAnalysis]:
"""Analyze multiple transcripts concurrently."""
tasks = [self.analyze_async(transcript) for transcript in transcripts]
return await asyncio.gather(*tasks)

def get_by_id(self, analysis_id: str) -> TranscriptAnalysis | None:
"""Retrieve an analysis by its ID."""
return self._repository.get_by_id(analysis_id)
6 changes: 3 additions & 3 deletions app/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@


class EnvConfigs(pydantic_settings.BaseSettings):
model_config =pydantic_settings.SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
model_config = pydantic_settings.SettingsConfigDict(
env_file=".env", env_file_encoding="utf-8"
)

OPENAI_API_KEY: str
OPENAI_MODEL: str = "gpt-4o-2024-08-06"


Empty file added app/domain/__init__.py
Empty file.
15 changes: 15 additions & 0 deletions app/domain/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Domain models for transcript analysis."""

import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone


@dataclass
class TranscriptAnalysis:
"""Represents an analyzed transcript with summary and action items."""

summary: str
action_items: list[str]
id: str = field(default_factory=lambda: str(uuid.uuid4()))
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
19 changes: 19 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""FastAPI application entry point."""

from fastapi import FastAPI

from app.api.routes import router

app = FastAPI(
title="Transcript Analysis API",
description="API for analyzing coaching session transcripts using AI",
version="1.0.0",
)

app.include_router(router)


@app.get("/health", tags=["health"])
def health_check() -> dict[str, str]:
"""Health check endpoint."""
return {"status": "healthy"}
3 changes: 2 additions & 1 deletion app/ports/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from app.ports.llm import LLm
from app.ports.llm import LLm
from app.ports.repository import TranscriptRepository
Binary file removed app/ports/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed app/ports/__pycache__/llm.cpython-312.pyc
Binary file not shown.
Loading