Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions examples/test-upliftai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Example usage of the UpliftAI engine."""

import os

from tts_wrapper import UpliftAIClient


def main() -> None:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0 | Confidence: High

The example calls speak_streamed() which is not implemented in the UpliftAIClient. This will cause an AttributeError at runtime. The base class likely provides this method, but it depends on proper implementation of synth_to_bytestream(). This creates a false expectation about available functionality.

Code Suggestion:

# Replace with actual streaming usage pattern
for chunk in client.synth_to_bytestream(text):
    # Process audio chunks
    pass

api_key = os.getenv("UPLIFTAI_KEY")
if not api_key:
raise RuntimeError("UPLIFTAI_KEY environment variable is not set")

client = UpliftAIClient(api_key=api_key)
text = "Testing the UpliftAI text to speech engine"
client.speak_streamed(text)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions tests/load_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"elevenlabs": ["ELEVENLABS_API_KEY"],
"witai": ["WITAI_TOKEN"],
"playht": ["PLAYHT_API_KEY", "PLAYHT_USER_ID"],
"upliftai": ["UPLIFTAI_KEY"],
}


Expand Down
5 changes: 5 additions & 0 deletions tests/test_generate_audiofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
GoogleClient,
GoogleTransClient,
MicrosoftClient,
UpliftAIClient,
PollyClient,
SherpaOnnxClient,
WatsonClient,
Expand Down Expand Up @@ -48,6 +49,10 @@
"client": WitAiClient,
"credential_keys": ["WITAI_TOKEN"],
},
"upliftai": {
"client": UpliftAIClient,
"credential_keys": ["UPLIFTAI_KEY"],
},
"googletrans": {
"client_lambda": lambda: GoogleTransClient("en-co.uk"),
},
Expand Down
6 changes: 6 additions & 0 deletions tests/test_tts_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PlayHTClient,
PollyClient,
SherpaOnnxClient,
UpliftAIClient,
WatsonClient,
WitAiClient,
eSpeakClient,
Expand All @@ -37,6 +38,7 @@
"espeak": eSpeakClient,
"playht": PlayHTClient,
"openai": OpenAIClient,
"upliftai": UpliftAIClient,
}

# Add AVSynth only on macOS
Expand Down Expand Up @@ -126,6 +128,8 @@ def check_credentials(service):
f"ElevenLabs API key: {elevenlabs_api_key[:5]}...{elevenlabs_api_key[-5:] if elevenlabs_api_key else ''}"
)
client = ElevenLabsClient(credentials=elevenlabs_api_key)
elif service == "upliftai":
client = UpliftAIClient(api_key=os.getenv("UPLIFTAI_KEY"))
elif service == "witai":
client = WitAiClient(credentials=os.getenv("WITAI_API_KEY"))
elif service == "googletrans":
Expand Down Expand Up @@ -199,6 +203,8 @@ def create_tts_client(service):
return PlayHTClient(
credentials=(os.getenv("PLAYHT_API_KEY"), os.getenv("PLAYHT_USER_ID"))
)
if service == "upliftai":
return UpliftAIClient(api_key=os.getenv("UPLIFTAI_KEY"))
if service == "avsynth" and sys.platform == "darwin":
return AVSynthClient()
if service == "openai":
Expand Down
1 change: 1 addition & 0 deletions tts_wrapper/engines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .googletrans import *
from .microsoft import *
from .openai import *
from .upliftai import *
from .pico import *
from .playht import *
from .polly import *
Expand Down
5 changes: 5 additions & 0 deletions tts_wrapper/engines/upliftai/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""UpliftAI TTS engine for tts-wrapper."""

from .client import UpliftAIClient

__all__ = ["UpliftAIClient"]
166 changes: 166 additions & 0 deletions tts_wrapper/engines/upliftai/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from __future__ import annotations

import logging
import os
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable

import requests

from tts_wrapper.tts import AbstractTTS

if TYPE_CHECKING:
from collections.abc import Generator


logger = logging.getLogger(__name__)


class UpliftAIClient(AbstractTTS):
"""Client for the UpliftAI text-to-speech API."""

BASE_URL = "https://api.upliftai.org/v1/synthesis/text-to-speech"
STREAM_URL = f"{BASE_URL}/stream"
DEFAULT_VOICE = "v_8eelc901" # Info/Education Urdu

def __init__(self, api_key: str | None = None) -> None:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 | Confidence: High

The API key is used directly as the Authorization header value without a standard scheme prefix (e.g., "Bearer"). This may not comply with the UpliftAI API requirements (documentation not provided in PR) and could cause authentication failures. Most modern APIs expect Bearer token format. Additionally, storing the raw key in instance state increases exposure risk if the object is serialized or logged.

Suggested change
def __init__(self, api_key: str | None = None) -> None:
def __init__(self, api_key: str | None = None) -> None:
super().__init__()
self.api_key = api_key or os.getenv("UPLIFTAI_KEY")
if not self.api_key:
msg = "UpliftAI API key is required. Set UPLIFTAI_KEY or pass api_key."
raise ValueError(msg)
self.headers = {
"Authorization": f"Bearer {self.api_key}", # Add scheme prefix
"Content-Type": "application/json"
}

super().__init__()
self.api_key = api_key or os.getenv("UPLIFTAI_KEY")
if not self.api_key:
msg = "UpliftAI API key is required. Set UPLIFTAI_KEY or pass api_key."
raise ValueError(msg)

self.headers = {"Authorization": self.api_key, "Content-Type": "application/json"}
self.audio_rate = 22050
self.voice_id = self.DEFAULT_VOICE

def set_voice(self, voice_id: str, lang: str | None = None) -> None:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 | Confidence: Medium

The method sets a lang attribute that isn't used in synthesis methods, creating inconsistent state. The voice ID already encapsulates language information (as seen in hardcoded voices). This redundant parameter may confuse developers and could become out-of-sync with actual voice capabilities.

Suggested change
def set_voice(self, voice_id: str, lang: str | None = None) -> None:
def set_voice(self, voice_id: str) -> None:
"""Set the voice for synthesis."""
self.voice_id = voice_id

"""Set the voice for synthesis."""
self.voice_id = voice_id
if lang:
self.lang = lang

def synth_to_bytes(self, text: Any, voice_id: str | None = None) -> bytes:
"""Synthesize text to audio bytes using the non-streaming endpoint."""
voice = voice_id or self.voice_id or self.DEFAULT_VOICE
payload = {
"voiceId": voice,
"text": str(text),
"outputFormat": "WAV_22050_16",
}
response = requests.post(self.BASE_URL, json=payload, headers=self.headers, timeout=30)
response.raise_for_status()
return response.content

def synth_to_bytestream(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 | Confidence: High

The streaming implementation does not verify the response Content-Type before yielding chunks. If the API returns an error (e.g., 200 with JSON error message due to API gateway misconfiguration), the client will treat JSON as audio data, potentially exposing sensitive error information or causing downstream processing errors. This violates the principle of defensive programming and could lead to security incidents if error messages contain internal API details.

Suggested change
def synth_to_bytestream(
with requests.post(...) as response:
response.raise_for_status()
# Verify we're receiving audio data
content_type = response.headers.get('Content-Type', '')
if not content_type.startswith('audio/'):
raise ValueError(f"Unexpected Content-Type: {content_type}")
for chunk in response.iter_content(chunk_size=4096):
if chunk:
yield chunk

self, text: Any, voice_id: str | None = None
) -> Generator[bytes, None, None]:
"""Stream synthesized audio chunks from the API."""
voice = voice_id or self.voice_id or self.DEFAULT_VOICE
payload = {
"voiceId": voice,
"text": str(text),
"outputFormat": "WAV_22050_16",
}
with requests.post(
self.STREAM_URL, json=payload, headers=self.headers, stream=True, timeout=30
) as response:
response.raise_for_status()
for chunk in response.iter_content(chunk_size=4096):
if chunk:
yield chunk

def synth(
self,
text: Any,
output_file: str | Path,
output_format: str = "wav",
voice_id: str | None = None,
) -> None:
"""Synthesize text to a file."""
audio_bytes = self.synth_to_bytes(text, voice_id)
with Path(output_file).open("wb") as f:
f.write(audio_bytes)

def _get_voices(self) -> list[dict[str, Any]]:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 | Confidence: High

Hardcoded voice list creates a maintenance burden and synchronization risk with upstream API changes. The method is private (_get_voices) but there's no public interface to access voices. This limits discoverability of available voices and may lead to voice ID typos that aren't caught until API call time. Consider implementing a public get_voices() method with caching/refresh mechanism.

"""Return the list of available voices.

The UpliftAI service does not provide a voices endpoint,
so the voices are hardcoded.
"""
return [
{
"id": "v_meklc281",
"name": "Info/Education V2",
"gender": "neutral",
"language_codes": ["ur"],
},
{
"id": "v_8eelc901",
"name": "Info/Education",
"gender": "neutral",
"language_codes": ["ur"],
},
{
"id": "v_30s70t3a",
"name": "Nostalgic News",
"gender": "neutral",
"language_codes": ["ur"],
},
{
"id": "v_yypgzenx",
"name": "Dada Jee",
"gender": "male",
"language_codes": ["ur"],
},
{
"id": "v_kwmp7zxt",
"name": "Gen Z",
"gender": "neutral",
"language_codes": ["ur"],
},
{"id": "v_sd0kl3m9", "name": "Female", "gender": "female", "language_codes": ["sd"]},
{
"id": "v_sd6mn4p2",
"name": "Male Calm",
"gender": "male",
"language_codes": ["sd"],
},
{
"id": "v_sd9qr7x5",
"name": "Male News",
"gender": "male",
"language_codes": ["sd"],
},
{
"id": "v_bl0ab8c4",
"name": "Balochi Male",
"gender": "male",
"language_codes": ["bal"],
},
{
"id": "v_bl1de2f7",
"name": "Balochi Female",
"gender": "female",
"language_codes": ["bal"],
},
]

def check_credentials(self) -> bool: # pragma: no cover - network call
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 | Confidence: Medium

The credential check performs a full synthesis request ("ping" text), which is resource-intensive for both client and server. A lighter-weight endpoint (e.g., dedicated health check or simple GET request) would be more efficient. This could lead to unnecessary API costs and slower startup times if called frequently.

"""Verify that the API key works by making a small request."""
try:
payload = {
"voiceId": self.voice_id or self.DEFAULT_VOICE,
"text": "ping",
"outputFormat": "WAV_22050_16",
}
response = requests.post(self.BASE_URL, json=payload, headers=self.headers, timeout=10)
response.raise_for_status()
return True
except Exception:
logger.debug("UpliftAI credential check failed", exc_info=True)
return False

def connect(self, event_name: str, callback: Callable) -> None:
"""Connect a callback function to an event."""
super().connect(event_name, callback)
Loading
Loading