Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,4 @@ ENV/

# Misc
*.swp
audio/
image/
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ENV APP_ROOT=/app
ENV PIPER_VOICES_DIR=/voices
ENV LOG_DIR=/log
ENV AUDIO_DIR=/audio
ENV PIPER_VOICE=en_GB-alan-medium
ENV PIPER_VOICE_ID=en_GB-alan-medium
ENV PYTHONUNBUFFERED=1

ARG IMAGE_VERSION
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Install the required packages

In order to run the bot properly, you must have a number of environment variables specified:
- `AUDIO_DIR` - The directory to put generated audio files
- `PIPER_VOICE` - The voice id of the piper voice, defaults to `en_GB-alan-medium`
- `PIPER_VOICE_ID` - The voice id of the piper voice, defaults to `en_GB-alan-medium`
- `PIPER_VOICES_DIR` - The directory containing the piper voice onnx and json files
- `FFMPEG_EXEC` - The path to the ffmpeg executable. You only need to specify this if `ffmpeg` is not part of your
system path.
Expand Down
6 changes: 3 additions & 3 deletions docker-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
set -euo pipefail

: "${PIPER_VOICES_DIR:=/voices}"
: "${PIPER_VOICE:=en_GB-alan-medium}"
: "${PIPER_VOICE_ID:=en_GB-alan-medium}"

PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE.onnx"
PIPER_VOICE_FILE="$PIPER_VOICES_DIR/$PIPER_VOICE_ID.onnx"

echo "[entrypoint] Voices dir: $PIPER_VOICES_DIR"

Expand All @@ -13,7 +13,7 @@ if [[ ! -f "$PIPER_VOICE_FILE" ]]; then
python -m piper.download_voices \
--debug \
--download-dir "$PIPER_VOICES_DIR" \
"$PIPER_VOICE"
"$PIPER_VOICE_ID"
else
echo "[entrypoint] Voice found at $PIPER_VOICE_FILE"
fi
Expand Down
21 changes: 6 additions & 15 deletions src/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
from discord.ext import commands
from typing import Any, Optional
import logging
from pathlib import Path
from eleven_labs_api import ElevenLabsAPI
import asyncio
from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, AUDIO_DIR, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG
from user_settings import BOT_TOKEN, ELEVEN_LABS_TOKEN, FFMPEG_EXEC, AUTO_VOICE_LEAVE_DELAY, DEBUG

RAW_PREFIX = "!batch"
RAW_PREFIX_SHORT = "!b"
Expand All @@ -22,8 +20,7 @@
intents.messages = True
intents.voice_states = True

name_api = Generator()
eleven_labs_api = ElevenLabsAPI(ELEVEN_LABS_TOKEN)
name_api = Generator(eleven_labs_api_token=ELEVEN_LABS_TOKEN)

g_last_name = "Benedict Cumberbatch"
g_last_phone = "benedict cumberbatch"
Expand Down Expand Up @@ -70,7 +67,6 @@ async def schedule_voice_leave(guild: discord.Guild) -> None:
if task := voice_leave_tasks.pop(guild.id, None):
task.cancel()


async def _worker():
try:
await asyncio.sleep(AUTO_VOICE_LEAVE_DELAY)
Expand All @@ -95,24 +91,19 @@ def cancel_voice_leave(guild: discord.Guild, reason: Optional[str] = None) -> No


async def _speak(ctx: commands.Context) -> Optional[Any]:
global g_autospeak
vc = vc_for(ctx.guild)
if not vc or not vc.is_connected():
return await ctx.reply("I am not connected to a voice channel.", mention_author=False)
if not vc.is_playing():
_count = eleven_labs_api.get_remaining_character_count()
if _count < 20 or DEBUG:
name_api.vocalize(g_last_phone)
audio_source = Path(AUDIO_DIR) / "output.wav"
else:
audio_source = eleven_labs_api.get_spoken_name(g_last_name, AUDIO_DIR)
audio_source = name_api.speak(g_last_name, g_last_phone)
return vc.play(discord.FFmpegPCMAudio(executable=FFMPEG_EXEC, source=str(audio_source)))
else:
return await ctx.reply("Audio is already playing.", mention_author=False)


async def _gen(ctx: commands.Context):
global g_last_name, g_last_phone, g_autospeak
name, phone = name_api.name()
name, phone = name_api.new_name()
g_last_name = name
g_last_phone = phone
await ctx.reply(name)
Expand Down Expand Up @@ -247,7 +238,7 @@ async def autospeak(ctx: commands.Context, subcmd: str = "on"):
@bot.command(name="count",
hidden=True)
async def count(ctx: commands.Context):
cnt = eleven_labs_api.get_remaining_character_count()
cnt = name_api.get_remaining_eleven_labs_character_count()
return await ctx.reply(f"{cnt} characters")


Expand Down
18 changes: 7 additions & 11 deletions src/eleven_labs_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,22 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Union
from user_settings import ELEVEN_LABS_VOICE_ID

MODEL_ID = "eleven_turbo_v2_5"
OUTPUT_FORMAT = "mp3_44100_128"
MAX_CHARACTERS = 10000

class ElevenLabsAPI:

@dataclass
class VoiceIDs:
Clyde = "wyWA56cQNU2KqUW4eCsI"
Charles = "zNsotODqUhvbJ5wMG7Ei"

class ElevenLabsAPI:
def __init__(self, token: str):
self.client = ElevenLabs(api_key=token)
self.character_count = 0
self.character_limit = 0
self.remaining_character_count = 0
self.update_character_count()

def get_spoken_name(self, name: str, audio_dir: Union[Path, str],
voice_id: str = VoiceIDs.Charles, speed: float = 1.0,
voice_id: str = ELEVEN_LABS_VOICE_ID, speed: float = 1.0,
regen: bool = False) -> Path:
name_id = name.replace(" ", "_")
file = Path(audio_dir) / f"{name_id}.mp3"
Expand All @@ -46,6 +43,5 @@ def get_spoken_name(self, name: str, audio_dir: Union[Path, str],
def update_character_count(self):
subscription = self.client.user.subscription.get()
self.character_count = subscription.character_count

def get_remaining_character_count(self):
return MAX_CHARACTERS - self.character_count
self.character_limit = subscription.character_limit
self.remaining_character_count = self.character_limit - self.character_count
41 changes: 27 additions & 14 deletions src/generator.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
import random
import json
from pathlib import Path
from typing import Union
from typing import Union, Optional
from piper import PiperVoice
import wave
from user_settings import PIPER_VOICE, PIPER_VOICES_DIR, AUDIO_DIR
from user_settings import PIPER_VOICE_ID, PIPER_VOICES_DIR, AUDIO_DIR
from eleven_labs_api import ElevenLabsAPI

PATH_TO_JSON = Path(__file__).parent / "phonemized_words.json"
VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE}.onnx"
VOICE_FILE = Path(PIPER_VOICES_DIR) / f"{PIPER_VOICE_ID}.onnx"
VOICE = PiperVoice.load(VOICE_FILE)


class Generator:

def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON):
with open(PATH_TO_JSON, 'r') as f:
def __init__(self, json_path: Union[Path, str]=PATH_TO_JSON, eleven_labs_api_token: Optional[str] = None):
with open(str(json_path), 'r') as f:
word_list = json.load(f)

self.givenPart1_map = word_list.get("givenPart1", {"Bene": "bene"})
self.givenPart2_map = word_list.get("givenPart2", {"dict": "dict"})
self.surnamePart1_map = word_list.get("surnamePart1", {"Cumber": "cumber"})
self.surnamePart2_map = word_list.get("surnamePart2", {"batch": "batch"})

if eleven_labs_api_token is not None:
self.eleven_labs_api = ElevenLabsAPI(eleven_labs_api_token)
else:
self.eleven_labs_api = None
return

def name(self):
def new_name(self):
first_part_1 = random.choice(list(self.givenPart1_map.keys()))
first_part_2 = random.choice(list(self.givenPart2_map.keys()))
last_part_1 = random.choice(list(self.surnamePart1_map.keys()))
Expand All @@ -37,20 +42,28 @@ def name(self):
phone = first_phone_part_1 + first_phone_part_2 + " " + last_phone_part_1 + last_phone_part_2
return first.capitalize() + " " + last.capitalize(), phone

@staticmethod
def vocalize(phone):
phone = f"[[ {phone} ]]"
wav_file = Path(AUDIO_DIR) / f"output.wav"
with wave.open(str(wav_file), 'wb') as output:
VOICE.synthesize_wav(phone, output)
def speak(self, name: str, phone: Optional[str] = None) -> Path:
cnt = 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count
if cnt < 20:
phone = f"[[ {phone} ]]" if phone else name
audio_file = Path(AUDIO_DIR) / f"output.wav"
with wave.open(str(audio_file), 'wb') as output:
VOICE.synthesize_wav(phone, output)
else:
audio_file = self.eleven_labs_api.get_spoken_name(name, AUDIO_DIR)

return audio_file

def get_remaining_eleven_labs_character_count(self):
return 0 if self.eleven_labs_api is None else self.eleven_labs_api.remaining_character_count


def main():
gen = Generator()
p = Path('~', 'Piper TTS', 'names.txt').expanduser()
with open(p, 'w') as f:
for _ in range(100):
f.write(gen.name()[0] + '.\n')
f.write(gen.new_name()[0] + '.\n')

if __name__ == "__main__":
main()
8 changes: 6 additions & 2 deletions src/user_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@
FFMPEG_EXEC = os.getenv("FFMPEG_EXEC", "ffmpeg")

# Piper
PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium")
_LEGACY_PIPER_VOICE = os.getenv("PIPER_VOICE", "en_GB-alan-medium")
PIPER_VOICE_ID = os.getenv("PIPER_VOICE_ID", _LEGACY_PIPER_VOICE)
PIPER_VOICES_DIR = os.getenv("PIPER_VOICES_DIR", "/voices")

# ElevenLabs
ELEVEN_LABS_VOICE_ID = os.getenv("ELEVEN_LABS_VOICE_ID", "zNsotODqUhvbJ5wMG7Ei") # Voice ID for "Charles"

# Discord
AUTO_VOICE_LEAVE_DELAY = os.getenv("AUTO_VOICE_LEAVE_DELAY", 20)
AUTO_VOICE_LEAVE_DELAY = int(os.getenv("AUTO_VOICE_LEAVE_DELAY", 20))

# Dev
DEBUG = os.getenv("DEBUG")