Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Run tests

on:
pull_request:
push:
branches:
- main

jobs:
tests:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: astral-sh/setup-uv@v2
- name: Install dependencies
run: uv sync --dev
- name: Run tests
run: uv run pytest
24 changes: 0 additions & 24 deletions Pipfile

This file was deleted.

91 changes: 91 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "ser"
version = "0.1.0"
description = "Speech Emotion Recognition toolkit"
readme = "README.md"
requires-python = ">=3.9"
license = {text = "MIT"}
authors = [{name = "Juan Sugg", email = "juanpedrosugg@gmail.com"}]
dependencies = [
"antlr4-python3-runtime==4.9.3",
"audioread==3.0.1; python_version >= '3.6'",
"certifi==2024.2.2; python_version >= '3.6'",
"cffi==1.16.0; python_version >= '3.8'",
"charset-normalizer==3.3.2; python_full_version >= '3.7.0'",
"cloudpickle==3.0.0; python_version >= '3.8'",
"colored==2.2.4; python_version >= '3.9'",
"decorator==5.1.1; python_version >= '3.5'",
"demucs==4.0.1; python_full_version >= '3.8.0'",
"dora-search==0.1.12; python_full_version >= '3.7.0'",
"einops==0.8.0; python_version >= '3.8'",
"ffmpeg-python==0.2.0",
"filelock==3.14.0; python_version >= '3.8'",
"fsspec==2024.5.0; python_version >= '3.8'",
"future==1.0.0; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'",
"huggingface-hub==0.23.1; python_full_version >= '3.8.0'",
"idna==3.7; python_version >= '3.5'",
"jinja2==3.1.4; python_version >= '3.7'",
"joblib==1.4.2; python_version >= '3.8'",
"julius==0.2.7; python_full_version >= '3.6.0'",
"lameenc==1.7.0",
"lazy-loader==0.4; python_version >= '3.7'",
"librosa==0.10.2.post1; python_version >= '3.7'",
"llvmlite==0.42.0; python_version >= '3.9'",
"markupsafe==2.1.5; python_version >= '3.7'",
"more-itertools==10.2.0; python_version >= '3.8'",
"mpmath==1.3.0",
"msgpack==1.0.8; python_version >= '3.8'",
"networkx==3.3; python_version >= '3.10'",
"numba==0.59.1; python_version >= '3.9'",
"numpy==1.26.2; python_version >= '3.9'",
"omegaconf==2.3.0; python_version >= '3.6'",
"openai-whisper==20231106; python_version >= '3.8'",
"openunmix==1.3.0; python_version >= '3.9'",
"packaging==24.0; python_version >= '3.7'",
"platformdirs==4.2.2; python_version >= '3.8'",
"pooch==1.8.1; python_version >= '3.7'",
"psutil==5.9.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"pycparser==2.22; python_version >= '3.8'",
"python-dotenv==1.0.1; python_version >= '3.8'",
"pyyaml==6.0.1; python_version >= '3.6'",
"regex==2024.5.15; python_version >= '3.8'",
"requests==2.32.2; python_version >= '3.8'",
"retrying==1.3.4",
"safetensors==0.4.3; python_version >= '3.7'",
"scikit-learn==1.3.2; python_version >= '3.8'",
"scipy==1.13.0; python_version >= '3.9'",
"six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"soundfile==0.12.1",
"soxr==0.3.7; python_version >= '3.6'",
"stable-ts==2.13.3; python_version >= '3.8'",
"submitit==1.5.1; python_version >= '3.8'",
"sympy==1.12; python_version >= '3.8'",
"threadpoolctl==3.5.0; python_version >= '3.8'",
"tiktoken==0.7.0; python_version >= '3.8'",
"tokenizers==0.19.1; python_version >= '3.7'",
"torch==2.2.2; python_full_version >= '3.8.0'",
"torchaudio==2.2.2",
"tqdm==4.66.1; python_version >= '3.7'",
"transformers==4.41.1; python_full_version >= '3.8.0'",
"treetable==0.2.5; python_full_version >= '3.6.0'",
"typing-extensions==4.8.0; python_version >= '3.8'",
"urllib3==2.2.1; python_version >= '3.8'",
]

[project.scripts]
ser = "ser.__main__:main"

[tool.uv]
dev-dependencies = [
"pytest>=8.2",
"pytest-cov>=5.0",
]

[tool.pytest.ini_options]
addopts = "-ra"
testpaths = ["tests"]

64 changes: 0 additions & 64 deletions requirements.txt

This file was deleted.

75 changes: 70 additions & 5 deletions ser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
"""

import argparse
import logging
import sys
import time
import logging
from typing import List, Tuple
from pathlib import Path

from ser.models.emotion_model import predict_emotions, train_model
from ser.transcript import extract_transcript
Expand All @@ -31,6 +31,7 @@
print_timeline,
save_timeline_to_csv,
)
from ser.utils.subtitles import SubtitleGenerator, FORMATTERS, timeline_to_subtitles
from ser.config import Config


Expand Down Expand Up @@ -65,6 +66,22 @@ def main() -> None:
action="store_true",
help="Save the transcript to a CSV file",
)
parser.add_argument(
"--subtitle-format",
choices=tuple(FORMATTERS.keys()),
help=(
"Export the generated timeline as subtitles in the chosen format. "
"If omitted, the format is inferred from --subtitle-output when possible."
),
)
parser.add_argument(
"--subtitle-output",
type=str,
help=(
"File path for the exported subtitle file. The format is inferred from "
"the extension when --subtitle-format is not provided."
),
)
args: argparse.Namespace = parser.parse_args()

if args.train:
Expand All @@ -82,13 +99,56 @@ def main() -> None:

logger.info(msg="Starting emotion prediction...")
start_time = time.time()
emotions: List[Tuple[str, float, float]] = predict_emotions(args.file)
transcript: List[Tuple[str, float, float]] = extract_transcript(
emotions: list[tuple[str, float, float]] = predict_emotions(args.file)
transcript: list[tuple[str, float, float]] = extract_transcript(
args.file, args.language
)
timeline: list = build_timeline(transcript, emotions)
timeline: list[tuple[float, str, str]] = build_timeline(transcript, emotions)
print_timeline(timeline)

if args.subtitle_format or args.subtitle_output:
if not args.subtitle_output:
logger.error(
msg="--subtitle-output is required to export subtitles.",
)
sys.exit(1)

subtitle_format: str | None = args.subtitle_format
if not subtitle_format:
subtitle_format = _infer_subtitle_format(args.subtitle_output)
if not subtitle_format:
logger.error(
"Unable to infer subtitle format from %s. Provide --subtitle-format.",
args.subtitle_output,
)
sys.exit(1)
else:
inferred_format: str | None = _infer_subtitle_format(args.subtitle_output)
if inferred_format and inferred_format != subtitle_format:
logger.info(
"Using subtitle format %s (overriding inferred format %s from output path)",
subtitle_format,
inferred_format,
)

subtitles: list[tuple[float, float, str, str]] = timeline_to_subtitles(timeline)
if not subtitles:
logger.warning("Timeline did not produce any subtitle entries to export.")
else:
try:
generator = SubtitleGenerator(FORMATTERS[subtitle_format])
generator.generate_file(subtitles, args.subtitle_output)
logger.info(
"Subtitle file exported to %s",
args.subtitle_output,
)
except Exception as err:
logger.error(
msg=f"Failed to export subtitles: {err}",
exc_info=True,
)
sys.exit(1)

if args.save_transcript:
csv_file_name: str = save_timeline_to_csv(timeline, args.file)
logger.info(msg=f"Timeline saved to {csv_file_name}")
Expand All @@ -98,5 +158,10 @@ def main() -> None:
)


def _infer_subtitle_format(output_path: str) -> str | None:
suffix: str = Path(output_path).suffix.lower().lstrip(".")
return suffix if suffix in FORMATTERS else None


if __name__ == "__main__":
main()
Loading
Loading