TTS / app.py
yaara1's picture
Update app.py
c6068ea verified
import re
import pandas as pd
from phonikud_onnx import Phonikud
from phonikud import phonemize
from piper_onnx import Piper
import soundfile as sf
import gradio as gr
phonikud_onnx = Phonikud("phonikud-1.0.int8.onnx")
piper = Piper('tts-model.onnx', 'tts-model.config.json')
# Letter-to-phoneme mapping for acronyms
LETTER_TO_PHONEME = {
'א': 'ʔa', 'ב': 'ba', 'ג': 'ɡa', 'ד': 'da', 'ה': 'ha', 'ו': 'va',
'ז': 'za', 'ח': 'χa', 'ט': 'ta', 'י': 'ja', 'כ': 'ka', 'ל': 'la',
'מ': 'ma', 'נ': 'na', 'ס': 'sa', 'ע': 'ʔa', 'פ': 'pa', 'צ': 'tsa',
'ק': 'ka', 'ר': 'ʁa', 'ש': 'ʃa', 'ת': 'ta', 'ם': 'm', 'ן': 'n',
'ף': 'f', 'ך': 'χ', 'ץ': 'ts'
}
# Split text into acronyms and regular chunks
def split_text(text):
words = []
tokens = text.split()
for token in tokens:
if re.sub(r'[״":,.!?]', '', token) in acronym_dict:
words.append(("in_dict", re.sub(r'[״":,.!?]', '', token))) # Remove quotes here
elif re.search(r'\w+["״]\w+', token):
words.append(("acronym", token))
elif token.strip():
words.append(("text", token))
return words
def handle_acronym(acronym):
acronym = re.sub(r'[״":,.!?]', '', acronym) # Remove quotes here
# Convert all letters except the last one
phonemes = ''.join(LETTER_TO_PHONEME.get(letter, letter) for letter in acronym[:-1])
# Convert the last letter and remove trailing 'a' if present
last_phoneme = LETTER_TO_PHONEME.get(acronym[-1], acronym[-1]).replace('a', '') # Removes ALL 'a's
phonemes += last_phoneme
print(f"Acronym: {acronym} → Phonemes: {phonemes}") # Optional debug
return phonemes
def convert_txt_to_phonemes(text):
result = []
for kind, chunk in split_text(text):
if kind== "in_dict":
print(f"Found! {chunk}{acronym_dict[chunk]}")
result.append(acronym_dict[chunk])
elif kind == "acronym":
result.append(handle_acronym(chunk))
else:
diacritized = phonikud_onnx.add_diacritics(chunk)
result.append(phonemize(diacritized))
phonemes = ' '.join(result)
print(phonemes)
return phonemes
def heb_to_speech(text, temp_word=" רות", temp_duration=0.36):
# Step 1: Add temporary word
text_with_temp = text + temp_word
phonemes = convert_txt_to_phonemes(text_with_temp)
# Step 2: Generate audio
samples, sample_rate = piper.create(phonemes, is_phonemes=True)
sf.write("raw_audio.wav", samples, sample_rate)
# Step 3: Trim temporary word
trim_samples = int(temp_duration * sample_rate)
trimmed_samples = samples[:-trim_samples]
sf.write("final_audio.wav", trimmed_samples, sample_rate)
return "final_audio.wav"
acronym_df = pd.read_excel("acronym-phonemes-dict.xlsx")
acronym_dict = {row['acronym']: row['phonemes'] for _, row in acronym_df.iterrows()}
with gr.Blocks() as demo:
text_input = gr.Textbox(label="Insert Hebrew text", lines=2)
generate_btn = gr.Button("Generate")
audio_output = gr.Audio(label="🔊", type="filepath", interactive=False)
generate_btn.click(fn=heb_to_speech, inputs=text_input, outputs=audio_output)
demo.launch(share=True)