Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion eng_to_ipa/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,26 @@ def get_cmu(tokens_in, db_type="sql"):
"""query the SQL database for the words and return the phonemes in the order of user_in"""
result = fetch_words(tokens_in, db_type)
ordered = []
space = ' '

for word in tokens_in:
this_word = [[i[1] for i in result if i[0] == word]][0]
if this_word:
ordered.append(this_word[0])
else:
ordered.append(["__IGNORE__" + word])
if( word.find('-') != -1 ):
# we couldn't transliterate a hyphenated word - try word parts
tmpresult = get_cmu( word.split('-'), db_type=db_type )
if not tmpresult[0][0].startswith('__IGNORE__') and not tmpresult[1][0].startswith('__IGNORE__'):
this_word = []
for this_word1 in tmpresult[0]:
for this_word2 in tmpresult[1]:
this_word.append(this_word1 + space + this_word2)
ordered.append(this_word)
else:
ordered.append(["__IGNORE__" + word])
else:
ordered.append(["__IGNORE__" + word])
return ordered


Expand Down
17 changes: 17 additions & 0 deletions tests/test_transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-

# USAGE:
# PYTHONPATH=".." python test_transcribe.py

from eng_to_ipa import transcribe
import transcribe_fixtures
import sys

words0 = "on-demand".split()

class TestConversion_default(transcribe_fixtures.BaseConversion):
@classmethod
def setUpClass(self):
self.words0 = words0
self.cmu0 = [['aa1 n d ih0 m ae1 n d', 'ao1 n d ih0 m ae1 n d']]
self.lang = None
14 changes: 14 additions & 0 deletions tests/transcribe_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-

# USAGE:
# PYTHONPATH=".." python test_transcribe.py

from eng_to_ipa import transcribe
import unittest

class BaseConversion(unittest.TestCase):
"""Simple unit testing for the transcribe function(s)."""

def test_get_cmu_on_demand(self):
res0 = transcribe.get_cmu(self.words0, db_type='sql')
self.assertEqual(res0, self.cmu0)