From d070e0566ef42278c62c7a5d45fc0e65dd0456e9 Mon Sep 17 00:00:00 2001
From: Eric Bower <31257558+ebower42@users.noreply.github.com>
Date: Mon, 27 Oct 2025 14:35:03 -0700
Subject: [PATCH] Make ElevenLabs Optional (#14)

* Make ElevenLabs Optional

* Added PIPER_VOICE backwards compatibility

* Remove parens

* Refactor ElevenLabsAPI to use CHARLES_VOICE_ID and update character count logic

* Remove MAX_ELEVEN_LABS_CHARACTERS from user settings

* Update default voice ID to use ELEVEN_LABS_VOICE_ID in get_spoken_name method

* Add comment to ELEVEN_LABS_VOICE_ID for clarity on voice assignment
---
 .dockerignore          |  1 -
 Dockerfile             |  2 +-
 README.md              |  2 +-
 docker-entrypoint.sh   |  6 +++---
 src/bot.py             | 21 ++++++---------------
 src/eleven_labs_api.py | 18 +++++++-----------
 src/generator.py       | 41 +++++++++++++++++++++++++++--------------
 src/user_settings.py   |  8 ++++++--
 8 files changed, 51 insertions(+), 48 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index aa3a994..1a306a8 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,5 +27,4 @@ ENV/
 
 # Misc
 *.swp
-audio/
 image/
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index d714dda..4cf381a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,7 +7,7 @@ ENV APP_ROOT=/app
 ENV PIPER_VOICES_DIR=/voices
 ENV LOG_DIR=/log
 ENV AUDIO_DIR=/audio
-ENV PIPER_VOICE=en_GB-alan-medium
+ENV PIPER_VOICE_ID=en_GB-alan-medium
 ENV PYTHONUNBUFFERED=1
 
 ARG IMAGE_VERSION
diff --git a/README.md b/README.md
index 0b27b50..4f00aad 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Install the required packages
 
 In order to run the bot properly, you must have a number of environment variables specified:
 - `AUDIO_DIR` - The directory to put generated audio files
-- `PIPER_VOICE` - The voice id of the piper voice, defaults to `en_GB-alan-medium`
+- `PIPER_VOICE_ID` - The voice id of the piper voice, defaults to `en_GB-alan-medium`
 - `PIPER_VOICES_DIR` - The directory containing the piper voice onnx and json files
 - `FFMPEG_EXEC` - The path to the ffmpeg executable. You only need to specify this if `ffmpeg` is not part of your 
   system path.
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 460b446..e4dfa6b 100644
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -2,9 +2,9 @@
 set -euo pipefail
 
 : "${PIPER_VOICES_DIR:=/voices}"
-: "${PIPER_VOICE:=en_GB-alan-medium}"
+: "${PIPER_VOICE_ID:=en_GB-alan-medium}"
 
-PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE.onnx"
+PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE_ID.onnx"
 
 echo "[entrypoint] Voices dir: $PIPER_VOICES_DIR"
 
@@ -13,7 +13,7 @@ if [[ ! -f "$PIPER_VOICE_FILE" ]]; then
   python -m piper.download_voices \
     --debug \
     --download-dir "$PIPER_VOICES_DIR" \
-    "$PIPER_VOICE"
+    "$PIPER_VOICE_ID"
 else
   echo "[entrypoint] Voice found at $PIPER_VOICE_FILE"
 fi
diff --git a/src/bot.py b/src/bot.py
index 4487ca2..5811eef 100644
--- a/src/bot.py
+++ b/src/bot.py
@@ -3,10 +3,8 @@
 from discord.ext import commands
 from typing import Any, Optional
 import logging
-from pathlib import Path
-from eleven_labs_api import ElevenLabsAPI
 import asyncio
-from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, AUDIO_DIR, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG
+from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG
 
 RAW_PREFIX = "!batch"
 RAW_PREFIX_SHORT = "!b"
@@ -22,8 +20,7 @@
 intents.messages = True
 intents.voice_states = True
 
-name_api = Generator()
-eleven_labs_api = ElevenLabsAPI(ELEVEN_LABS_TOKEN)
+name_api = Generator(eleven_labs_api_token=ELEVEN_LABS_TOKEN)
 
 g_last_name = "Benedict Cumberbatch"
 g_last_phone = "benedict cumberbatch"
@@ -70,7 +67,6 @@ async def schedule_voice_leave(guild: discord.Guild) -> None:
     if task := voice_leave_tasks.pop(guild.id, None):
         task.cancel()
 
-
     async def _worker():
         try:
             await asyncio.sleep(AUTO_VOICE_LEAVE_DELAY)
@@ -95,24 +91,19 @@ def cancel_voice_leave(guild: discord.Guild, reason: Optional[str] = None) -> No
 
 
 async def _speak(ctx: commands.Context) -> Optional[Any]:
-    global g_autospeak
     vc = vc_for(ctx.guild)
     if not vc or not vc.is_connected():
         return await ctx.reply("I am not connected to a voice channel.", mention_author=False)
     if not vc.is_playing():
-        _count = eleven_labs_api.get_remaining_character_count()
-        if _count < 20 or DEBUG:
-            name_api.vocalize(g_last_phone)
-            audio_source = Path(AUDIO_DIR) / "output.wav"
-        else:
-            audio_source = eleven_labs_api.get_spoken_name(g_last_name, AUDIO_DIR)
+        audio_source = name_api.speak(g_last_name, g_last_phone)
         return vc.play(discord.FFmpegPCMAudio(executable=FFMPEG_EXEC, source=str(audio_source)))
     else:
         return await ctx.reply("Audio is already playing.", mention_author=False)
 
+
 async def _gen(ctx: commands.Context):
     global g_last_name, g_last_phone, g_autospeak
-    name, phone = name_api.name()
+    name, phone = name_api.new_name()
     g_last_name = name
     g_last_phone = phone
     await ctx.reply(name)
@@ -247,7 +238,7 @@ async def autospeak(ctx: commands.Context, subcmd: str = "on"):
 @bot.command(name="count",
              hidden=True)
 async def count(ctx: commands.Context):
-    cnt = eleven_labs_api.get_remaining_character_count()
+    cnt = name_api.get_remaining_eleven_labs_character_count()
     return await ctx.reply(f"{cnt} characters")
 
 
diff --git a/src/eleven_labs_api.py b/src/eleven_labs_api.py
index aab038b..3fc6d37 100644
--- a/src/eleven_labs_api.py
+++ b/src/eleven_labs_api.py
@@ -3,25 +3,22 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Union
+from user_settings import ELEVEN_LABS_VOICE_ID
 
 MODEL_ID = "eleven_turbo_v2_5"
 OUTPUT_FORMAT = "mp3_44100_128"
-MAX_CHARACTERS = 10000
 
-class ElevenLabsAPI:
-
-    @dataclass
-    class VoiceIDs:
-        Clyde = "wyWA56cQNU2KqUW4eCsI"
-        Charles = "zNsotODqUhvbJ5wMG7Ei"
 
+class ElevenLabsAPI:
     def __init__(self, token: str):
         self.client = ElevenLabs(api_key=token)
         self.character_count = 0
+        self.character_limit = 0
+        self.remaining_character_count = 0
         self.update_character_count()
 
     def get_spoken_name(self, name: str, audio_dir: Union[Path, str],
-                        voice_id: str = VoiceIDs.Charles, speed: float = 1.0,
+                        voice_id: str = ELEVEN_LABS_VOICE_ID, speed: float = 1.0,
                         regen: bool = False) -> Path:
         name_id = name.replace(" ", "_")
         file = Path(audio_dir) / f"{name_id}.mp3"
@@ -46,6 +43,5 @@ def get_spoken_name(self, name: str, audio_dir: Union[Path, str],
     def update_character_count(self):
         subscription = self.client.user.subscription.get()
         self.character_count = subscription.character_count
-
-    def get_remaining_character_count(self):
-        return MAX_CHARACTERS - self.character_count
\ No newline at end of file
+        self.character_limit = subscription.character_limit
+        self.remaining_character_count = self.character_limit - self.character_count
\ No newline at end of file
diff --git a/src/generator.py b/src/generator.py
index 3e09bd1..6af7e6b 100644
--- a/src/generator.py
+++ b/src/generator.py
@@ -1,29 +1,34 @@
 import random
 import json
 from pathlib import Path
-from typing import Union
+from typing import Union, Optional
 from piper import PiperVoice
 import wave
-from user_settings import PIPER_VOICE, PIPER_VOICES_DIR, AUDIO_DIR
+from user_settings import PIPER_VOICE_ID, PIPER_VOICES_DIR, AUDIO_DIR
+from eleven_labs_api import ElevenLabsAPI
 
 PATH_TO_JSON = Path(__file__).parent / "phonemized_words.json"
-VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE}.onnx"
+VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE_ID}.onnx"
 VOICE = PiperVoice.load(VOICE_FILE)
 
 
 class Generator:
-
-    def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON):
-        with open(PATH_TO_JSON, 'r') as f:
+    def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON, eleven_labs_api_token: Optional[str] = None):
+        with open(str(json_path), 'r') as f:
           word_list = json.load(f)
 
         self.givenPart1_map = word_list.get("givenPart1", {"Bene": "bene"})
         self.givenPart2_map = word_list.get("givenPart2", {"dict": "dict"})
         self.surnamePart1_map = word_list.get("surnamePart1", {"Cumber": "cumber"})
         self.surnamePart2_map = word_list.get("surnamePart2", {"batch": "batch"})
+
+        if eleven_labs_api_token is not None:
+            self.eleven_labs_api = ElevenLabsAPI(eleven_labs_api_token)
+        else:
+            self.eleven_labs_api = None
         return
   
-    def name(self):
+    def new_name(self):
         first_part_1 = random.choice(list(self.givenPart1_map.keys()))
         first_part_2 = random.choice(list(self.givenPart2_map.keys()))
         last_part_1 = random.choice(list(self.surnamePart1_map.keys()))
@@ -37,12 +42,20 @@ def name(self):
         phone = first_phone_part_1 + first_phone_part_2 + " " + last_phone_part_1 + last_phone_part_2
         return first.capitalize() + " " + last.capitalize(), phone
 
-    @staticmethod
-    def vocalize(phone):
-        phone = f"[[ {phone} ]]"
-        wav_file = Path(AUDIO_DIR) / f"output.wav"
-        with wave.open(str(wav_file), 'wb') as output:
-            VOICE.synthesize_wav(phone, output)
+    def speak(self, name: str, phone: Optional[str] = None) -> Path:
+        cnt = 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count
+        if cnt < 20:
+            phone = f"[[ {phone} ]]" if phone else name
+            audio_file = Path(AUDIO_DIR) / f"output.wav"
+            with wave.open(str(audio_file), 'wb') as output:
+                VOICE.synthesize_wav(phone, output)
+        else:
+            audio_file = self.eleven_labs_api.get_spoken_name(name, AUDIO_DIR)
+
+        return audio_file
+
+    def get_remaining_eleven_labs_character_count(self):
+        return 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count
 
 
 def main():
@@ -50,7 +63,7 @@ def main():
     p = Path('~', 'Piper TTS', 'names.txt').expanduser()
     with open(p, 'w') as f:
         for _ in range(100):
-            f.write(gen.name()[0] + '.\n')
+            f.write(gen.new_name()[0] + '.\n')
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/src/user_settings.py b/src/user_settings.py
index 59d67ad..e1033c0 100644
--- a/src/user_settings.py
+++ b/src/user_settings.py
@@ -9,11 +9,15 @@
 FFMPEG_EXEC = os.getenv("FFMPEG_EXEC", "ffmpeg")
 
 # Piper
-PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium")
+_LEGACY_PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium")
+PIPER_VOICE_ID = os.getenv("PIPER_VOICE_ID", _LEGACY_PIPER_VOICE)
 PIPER_VOICES_DIR = os.getenv("PIPER_VOICES_DIR", "/voices")
 
+# ElevenLabs
+ELEVEN_LABS_VOICE_ID = os.getenv("ELEVEN_LABS_VOICE_ID", "zNsotODqUhvbJ5wMG7Ei") # Voice ID for "Charles"
+
 # Discord
-AUTO_VOICE_LEAVE_DELAY = os.getenv("AUTO_VOICE_LEAVE_DELAY", 20)
+AUTO_VOICE_LEAVE_DELAY = int(os.getenv("AUTO_VOICE_LEAVE_DELAY", 20))
 
 # Dev
 DEBUG = os.getenv("DEBUG")