jsugg · jsugg · Nov 6, 2025
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -0,0 +1,26 @@
+name: Run tests
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - uses: astral-sh/setup-uv@v2
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Run tests
+        run: uv run pytest
diff --git a/Pipfile b/Pipfile
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,91 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ser"
+version = "0.1.0"
+description = "Speech Emotion Recognition toolkit"
+readme = "README.md"
+requires-python = ">=3.9"
+license = {text = "MIT"}
+authors = [{name = "Juan Sugg", email = "juanpedrosugg@gmail.com"}]
+dependencies = [
+    "antlr4-python3-runtime==4.9.3",
+    "audioread==3.0.1; python_version >= '3.6'",
+    "certifi==2024.2.2; python_version >= '3.6'",
+    "cffi==1.16.0; python_version >= '3.8'",
+    "charset-normalizer==3.3.2; python_full_version >= '3.7.0'",
+    "cloudpickle==3.0.0; python_version >= '3.8'",
+    "colored==2.2.4; python_version >= '3.9'",
+    "decorator==5.1.1; python_version >= '3.5'",
+    "demucs==4.0.1; python_full_version >= '3.8.0'",
+    "dora-search==0.1.12; python_full_version >= '3.7.0'",
+    "einops==0.8.0; python_version >= '3.8'",
+    "ffmpeg-python==0.2.0",
+    "filelock==3.14.0; python_version >= '3.8'",
+    "fsspec==2024.5.0; python_version >= '3.8'",
+    "future==1.0.0; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'",
+    "huggingface-hub==0.23.1; python_full_version >= '3.8.0'",
+    "idna==3.7; python_version >= '3.5'",
+    "jinja2==3.1.4; python_version >= '3.7'",
+    "joblib==1.4.2; python_version >= '3.8'",
+    "julius==0.2.7; python_full_version >= '3.6.0'",
+    "lameenc==1.7.0",
+    "lazy-loader==0.4; python_version >= '3.7'",
+    "librosa==0.10.2.post1; python_version >= '3.7'",
+    "llvmlite==0.42.0; python_version >= '3.9'",
+    "markupsafe==2.1.5; python_version >= '3.7'",
+    "more-itertools==10.2.0; python_version >= '3.8'",
+    "mpmath==1.3.0",
+    "msgpack==1.0.8; python_version >= '3.8'",
+    "networkx==3.3; python_version >= '3.10'",
+    "numba==0.59.1; python_version >= '3.9'",
+    "numpy==1.26.2; python_version >= '3.9'",
+    "omegaconf==2.3.0; python_version >= '3.6'",
+    "openai-whisper==20231106; python_version >= '3.8'",
+    "openunmix==1.3.0; python_version >= '3.9'",
+    "packaging==24.0; python_version >= '3.7'",
+    "platformdirs==4.2.2; python_version >= '3.8'",
+    "pooch==1.8.1; python_version >= '3.7'",
+    "psutil==5.9.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
+    "pycparser==2.22; python_version >= '3.8'",
+    "python-dotenv==1.0.1; python_version >= '3.8'",
+    "pyyaml==6.0.1; python_version >= '3.6'",
+    "regex==2024.5.15; python_version >= '3.8'",
+    "requests==2.32.2; python_version >= '3.8'",
+    "retrying==1.3.4",
+    "safetensors==0.4.3; python_version >= '3.7'",
+    "scikit-learn==1.3.2; python_version >= '3.8'",
+    "scipy==1.13.0; python_version >= '3.9'",
+    "six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
+    "soundfile==0.12.1",
+    "soxr==0.3.7; python_version >= '3.6'",
+    "stable-ts==2.13.3; python_version >= '3.8'",
+    "submitit==1.5.1; python_version >= '3.8'",
+    "sympy==1.12; python_version >= '3.8'",
+    "threadpoolctl==3.5.0; python_version >= '3.8'",
+    "tiktoken==0.7.0; python_version >= '3.8'",
+    "tokenizers==0.19.1; python_version >= '3.7'",
+    "torch==2.2.2; python_full_version >= '3.8.0'",
+    "torchaudio==2.2.2",
+    "tqdm==4.66.1; python_version >= '3.7'",
+    "transformers==4.41.1; python_full_version >= '3.8.0'",
+    "treetable==0.2.5; python_full_version >= '3.6.0'",
+    "typing-extensions==4.8.0; python_version >= '3.8'",
+    "urllib3==2.2.1; python_version >= '3.8'",
+]
+
+[project.scripts]
+ser = "ser.__main__:main"
+
+[tool.uv]
+dev-dependencies = [
+    "pytest>=8.2",
+    "pytest-cov>=5.0",
+]
+
+[tool.pytest.ini_options]
+addopts = "-ra"
+testpaths = ["tests"]
+
diff --git a/requirements.txt b/requirements.txt
diff --git a/ser/__main__.py b/ser/__main__.py
@@ -18,10 +18,10 @@
 """
 
 import argparse
+import logging
 import sys
 import time
-import logging
-from typing import List, Tuple
+from pathlib import Path
 
 from ser.models.emotion_model import predict_emotions, train_model
 from ser.transcript import extract_transcript
@@ -31,6 +31,7 @@
     print_timeline,
     save_timeline_to_csv,
 )
+from ser.utils.subtitles import SubtitleGenerator, FORMATTERS, timeline_to_subtitles
 from ser.config import Config
 
 
@@ -65,6 +66,22 @@ def main() -> None:
         action="store_true",
         help="Save the transcript to a CSV file",
     )
+    parser.add_argument(
+        "--subtitle-format",
+        choices=tuple(FORMATTERS.keys()),
+        help=(
+            "Export the generated timeline as subtitles in the chosen format. "
+            "If omitted, the format is inferred from --subtitle-output when possible."
+        ),
+    )
+    parser.add_argument(
+        "--subtitle-output",
+        type=str,
+        help=(
+            "File path for the exported subtitle file. The format is inferred from "
+            "the extension when --subtitle-format is not provided."
+        ),
+    )
     args: argparse.Namespace = parser.parse_args()
 
     if args.train:
@@ -82,13 +99,56 @@ def main() -> None:
 
     logger.info(msg="Starting emotion prediction...")
     start_time = time.time()
-    emotions: List[Tuple[str, float, float]] = predict_emotions(args.file)
-    transcript: List[Tuple[str, float, float]] = extract_transcript(
+    emotions: list[tuple[str, float, float]] = predict_emotions(args.file)
+    transcript: list[tuple[str, float, float]] = extract_transcript(
         args.file, args.language
     )
-    timeline: list = build_timeline(transcript, emotions)
+    timeline: list[tuple[float, str, str]] = build_timeline(transcript, emotions)
     print_timeline(timeline)
 
+    if args.subtitle_format or args.subtitle_output:
+        if not args.subtitle_output:
+            logger.error(
+                msg="--subtitle-output is required to export subtitles.",
+            )
+            sys.exit(1)
+
+        subtitle_format: str | None = args.subtitle_format
+        if not subtitle_format:
+            subtitle_format = _infer_subtitle_format(args.subtitle_output)
+            if not subtitle_format:
+                logger.error(
+                    "Unable to infer subtitle format from %s. Provide --subtitle-format.",
+                    args.subtitle_output,
+                )
+                sys.exit(1)
+        else:
+            inferred_format: str | None = _infer_subtitle_format(args.subtitle_output)
+            if inferred_format and inferred_format != subtitle_format:
+                logger.info(
+                    "Using subtitle format %s (overriding inferred format %s from output path)",
+                    subtitle_format,
+                    inferred_format,
+                )
+
+        subtitles: list[tuple[float, float, str, str]] = timeline_to_subtitles(timeline)
+        if not subtitles:
+            logger.warning("Timeline did not produce any subtitle entries to export.")
+        else:
+            try:
+                generator = SubtitleGenerator(FORMATTERS[subtitle_format])
+                generator.generate_file(subtitles, args.subtitle_output)
+                logger.info(
+                    "Subtitle file exported to %s",
+                    args.subtitle_output,
+                )
+            except Exception as err:
+                logger.error(
+                    msg=f"Failed to export subtitles: {err}",
+                    exc_info=True,
+                )
+                sys.exit(1)
+
     if args.save_transcript:
         csv_file_name: str = save_timeline_to_csv(timeline, args.file)
         logger.info(msg=f"Timeline saved to {csv_file_name}")
@@ -98,5 +158,10 @@ def main() -> None:
     )
 
 
+def _infer_subtitle_format(output_path: str) -> str | None:
+    suffix: str = Path(output_path).suffix.lower().lstrip(".")
+    return suffix if suffix in FORMATTERS else None
+
+
 if __name__ == "__main__":
     main()