From d070e0566ef42278c62c7a5d45fc0e65dd0456e9 Mon Sep 17 00:00:00 2001 From: Eric Bower <31257558+ebower42@users.noreply.github.com> Date: Mon, 27 Oct 2025 14:35:03 -0700 Subject: [PATCH] Make ElevenLabs Optional (#14) * Make ElevenLabs Optional * Added PIPER_VOICE backwards compatibility * Remove parens * Refactor ElevenLabsAPI to use CHARLES_VOICE_ID and update character count logic * Remove MAX_ELEVEN_LABS_CHARACTERS from user settings * Update default voice ID to use ELEVEN_LABS_VOICE_ID in get_spoken_name method * Add comment to ELEVEN_LABS_VOICE_ID for clarity on voice assignment --- .dockerignore | 1 - Dockerfile | 2 +- README.md | 2 +- docker-entrypoint.sh | 6 +++--- src/bot.py | 21 ++++++--------------- src/eleven_labs_api.py | 18 +++++++----------- src/generator.py | 41 +++++++++++++++++++++++++++-------------- src/user_settings.py | 8 ++++++-- 8 files changed, 51 insertions(+), 48 deletions(-) diff --git a/.dockerignore b/.dockerignore index aa3a994..1a306a8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -27,5 +27,4 @@ ENV/ # Misc *.swp -audio/ image/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index d714dda..4cf381a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV APP_ROOT=/app ENV PIPER_VOICES_DIR=/voices ENV LOG_DIR=/log ENV AUDIO_DIR=/audio -ENV PIPER_VOICE=en_GB-alan-medium +ENV PIPER_VOICE_ID=en_GB-alan-medium ENV PYTHONUNBUFFERED=1 ARG IMAGE_VERSION diff --git a/README.md b/README.md index 0b27b50..4f00aad 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Install the required packages In order to run the bot properly, you must have a number of environment variables specified: - `AUDIO_DIR` - The directory to put generated audio files -- `PIPER_VOICE` - The voice id of the piper voice, defaults to `en_GB-alan-medium` +- `PIPER_VOICE_ID` - The voice id of the piper voice, defaults to `en_GB-alan-medium` - `PIPER_VOICES_DIR` - The directory containing the piper voice onnx and json files - `FFMPEG_EXEC` - The path to the ffmpeg executable. You only need to specify this if `ffmpeg` is not part of your system path. diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 460b446..e4dfa6b 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -2,9 +2,9 @@ set -euo pipefail : "${PIPER_VOICES_DIR:=/voices}" -: "${PIPER_VOICE:=en_GB-alan-medium}" +: "${PIPER_VOICE_ID:=en_GB-alan-medium}" -PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE.onnx" +PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE_ID.onnx" echo "[entrypoint] Voices dir: $PIPER_VOICES_DIR" @@ -13,7 +13,7 @@ if [[ ! -f "$PIPER_VOICE_FILE" ]]; then python -m piper.download_voices \ --debug \ --download-dir "$PIPER_VOICES_DIR" \ - "$PIPER_VOICE" + "$PIPER_VOICE_ID" else echo "[entrypoint] Voice found at $PIPER_VOICE_FILE" fi diff --git a/src/bot.py b/src/bot.py index 4487ca2..5811eef 100644 --- a/src/bot.py +++ b/src/bot.py @@ -3,10 +3,8 @@ from discord.ext import commands from typing import Any, Optional import logging -from pathlib import Path -from eleven_labs_api import ElevenLabsAPI import asyncio -from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, AUDIO_DIR, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG +from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG RAW_PREFIX = "!batch" RAW_PREFIX_SHORT = "!b" @@ -22,8 +20,7 @@ intents.messages = True intents.voice_states = True -name_api = Generator() -eleven_labs_api = ElevenLabsAPI(ELEVEN_LABS_TOKEN) +name_api = Generator(eleven_labs_api_token=ELEVEN_LABS_TOKEN) g_last_name = "Benedict Cumberbatch" g_last_phone = "benedict cumberbatch" @@ -70,7 +67,6 @@ async def schedule_voice_leave(guild: discord.Guild) -> None: if task := voice_leave_tasks.pop(guild.id, None): task.cancel() - async def _worker(): try: await asyncio.sleep(AUTO_VOICE_LEAVE_DELAY) @@ -95,24 +91,19 @@ def cancel_voice_leave(guild: discord.Guild, reason: Optional[str] = None) -> No async def _speak(ctx: commands.Context) -> Optional[Any]: - global g_autospeak vc = vc_for(ctx.guild) if not vc or not vc.is_connected(): return await ctx.reply("I am not connected to a voice channel.", mention_author=False) if not vc.is_playing(): - _count = eleven_labs_api.get_remaining_character_count() - if _count < 20 or DEBUG: - name_api.vocalize(g_last_phone) - audio_source = Path(AUDIO_DIR) / "output.wav" - else: - audio_source = eleven_labs_api.get_spoken_name(g_last_name, AUDIO_DIR) + audio_source = name_api.speak(g_last_name, g_last_phone) return vc.play(discord.FFmpegPCMAudio(executable=FFMPEG_EXEC, source=str(audio_source))) else: return await ctx.reply("Audio is already playing.", mention_author=False) + async def _gen(ctx: commands.Context): global g_last_name, g_last_phone, g_autospeak - name, phone = name_api.name() + name, phone = name_api.new_name() g_last_name = name g_last_phone = phone await ctx.reply(name) @@ -247,7 +238,7 @@ async def autospeak(ctx: commands.Context, subcmd: str = "on"): @bot.command(name="count", hidden=True) async def count(ctx: commands.Context): - cnt = eleven_labs_api.get_remaining_character_count() + cnt = name_api.get_remaining_eleven_labs_character_count() return await ctx.reply(f"{cnt} characters") diff --git a/src/eleven_labs_api.py b/src/eleven_labs_api.py index aab038b..3fc6d37 100644 --- a/src/eleven_labs_api.py +++ b/src/eleven_labs_api.py @@ -3,25 +3,22 @@ from dataclasses import dataclass from pathlib import Path from typing import Union +from user_settings import ELEVEN_LABS_VOICE_ID MODEL_ID = "eleven_turbo_v2_5" OUTPUT_FORMAT = "mp3_44100_128" -MAX_CHARACTERS = 10000 -class ElevenLabsAPI: - - @dataclass - class VoiceIDs: - Clyde = "wyWA56cQNU2KqUW4eCsI" - Charles = "zNsotODqUhvbJ5wMG7Ei" +class ElevenLabsAPI: def __init__(self, token: str): self.client = ElevenLabs(api_key=token) self.character_count = 0 + self.character_limit = 0 + self.remaining_character_count = 0 self.update_character_count() def get_spoken_name(self, name: str, audio_dir: Union[Path, str], - voice_id: str = VoiceIDs.Charles, speed: float = 1.0, + voice_id: str = ELEVEN_LABS_VOICE_ID, speed: float = 1.0, regen: bool = False) -> Path: name_id = name.replace(" ", "_") file = Path(audio_dir) / f"{name_id}.mp3" @@ -46,6 +43,5 @@ def get_spoken_name(self, name: str, audio_dir: Union[Path, str], def update_character_count(self): subscription = self.client.user.subscription.get() self.character_count = subscription.character_count - - def get_remaining_character_count(self): - return MAX_CHARACTERS - self.character_count \ No newline at end of file + self.character_limit = subscription.character_limit + self.remaining_character_count = self.character_limit - self.character_count \ No newline at end of file diff --git a/src/generator.py b/src/generator.py index 3e09bd1..6af7e6b 100644 --- a/src/generator.py +++ b/src/generator.py @@ -1,29 +1,34 @@ import random import json from pathlib import Path -from typing import Union +from typing import Union, Optional from piper import PiperVoice import wave -from user_settings import PIPER_VOICE, PIPER_VOICES_DIR, AUDIO_DIR +from user_settings import PIPER_VOICE_ID, PIPER_VOICES_DIR, AUDIO_DIR +from eleven_labs_api import ElevenLabsAPI PATH_TO_JSON = Path(__file__).parent / "phonemized_words.json" -VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE}.onnx" +VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE_ID}.onnx" VOICE = PiperVoice.load(VOICE_FILE) class Generator: - - def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON): - with open(PATH_TO_JSON, 'r') as f: + def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON, eleven_labs_api_token: Optional[str] = None): + with open(str(json_path), 'r') as f: word_list = json.load(f) self.givenPart1_map = word_list.get("givenPart1", {"Bene": "bene"}) self.givenPart2_map = word_list.get("givenPart2", {"dict": "dict"}) self.surnamePart1_map = word_list.get("surnamePart1", {"Cumber": "cumber"}) self.surnamePart2_map = word_list.get("surnamePart2", {"batch": "batch"}) + + if eleven_labs_api_token is not None: + self.eleven_labs_api = ElevenLabsAPI(eleven_labs_api_token) + else: + self.eleven_labs_api = None return - def name(self): + def new_name(self): first_part_1 = random.choice(list(self.givenPart1_map.keys())) first_part_2 = random.choice(list(self.givenPart2_map.keys())) last_part_1 = random.choice(list(self.surnamePart1_map.keys())) @@ -37,12 +42,20 @@ def name(self): phone = first_phone_part_1 + first_phone_part_2 + " " + last_phone_part_1 + last_phone_part_2 return first.capitalize() + " " + last.capitalize(), phone - @staticmethod - def vocalize(phone): - phone = f"[[ {phone} ]]" - wav_file = Path(AUDIO_DIR) / f"output.wav" - with wave.open(str(wav_file), 'wb') as output: - VOICE.synthesize_wav(phone, output) + def speak(self, name: str, phone: Optional[str] = None) -> Path: + cnt = 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count + if cnt < 20: + phone = f"[[ {phone} ]]" if phone else name + audio_file = Path(AUDIO_DIR) / f"output.wav" + with wave.open(str(audio_file), 'wb') as output: + VOICE.synthesize_wav(phone, output) + else: + audio_file = self.eleven_labs_api.get_spoken_name(name, AUDIO_DIR) + + return audio_file + + def get_remaining_eleven_labs_character_count(self): + return 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count def main(): @@ -50,7 +63,7 @@ def main(): p = Path('~', 'Piper TTS', 'names.txt').expanduser() with open(p, 'w') as f: for _ in range(100): - f.write(gen.name()[0] + '.\n') + f.write(gen.new_name()[0] + '.\n') if __name__ == "__main__": main() \ No newline at end of file diff --git a/src/user_settings.py b/src/user_settings.py index 59d67ad..e1033c0 100644 --- a/src/user_settings.py +++ b/src/user_settings.py @@ -9,11 +9,15 @@ FFMPEG_EXEC = os.getenv("FFMPEG_EXEC", "ffmpeg") # Piper -PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium") +_LEGACY_PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium") +PIPER_VOICE_ID = os.getenv("PIPER_VOICE_ID", _LEGACY_PIPER_VOICE) PIPER_VOICES_DIR = os.getenv("PIPER_VOICES_DIR", "/voices") +# ElevenLabs +ELEVEN_LABS_VOICE_ID = os.getenv("ELEVEN_LABS_VOICE_ID", "zNsotODqUhvbJ5wMG7Ei") # Voice ID for "Charles" + # Discord -AUTO_VOICE_LEAVE_DELAY = os.getenv("AUTO_VOICE_LEAVE_DELAY", 20) +AUTO_VOICE_LEAVE_DELAY = int(os.getenv("AUTO_VOICE_LEAVE_DELAY", 20)) # Dev DEBUG = os.getenv("DEBUG")