|
import re |
|
import pandas as pd |
|
from phonikud_onnx import Phonikud |
|
from phonikud import phonemize |
|
from piper_onnx import Piper |
|
import soundfile as sf |
|
import gradio as gr |
|
|
|
|
|
phonikud_onnx = Phonikud("phonikud-1.0.int8.onnx") |
|
piper = Piper('tts-model.onnx', 'tts-model.config.json') |
|
|
|
|
|
LETTER_TO_PHONEME = { |
|
'א': 'ʔa', 'ב': 'ba', 'ג': 'ɡa', 'ד': 'da', 'ה': 'ha', 'ו': 'va', |
|
'ז': 'za', 'ח': 'χa', 'ט': 'ta', 'י': 'ja', 'כ': 'ka', 'ל': 'la', |
|
'מ': 'ma', 'נ': 'na', 'ס': 'sa', 'ע': 'ʔa', 'פ': 'pa', 'צ': 'tsa', |
|
'ק': 'ka', 'ר': 'ʁa', 'ש': 'ʃa', 'ת': 'ta', 'ם': 'm', 'ן': 'n', |
|
'ף': 'f', 'ך': 'χ', 'ץ': 'ts' |
|
} |
|
|
|
|
|
def split_text(text): |
|
words = [] |
|
tokens = text.split() |
|
for token in tokens: |
|
if re.sub(r'[״":,.!?]', '', token) in acronym_dict: |
|
words.append(("in_dict", re.sub(r'[״":,.!?]', '', token))) |
|
elif re.search(r'\w+["״]\w+', token): |
|
words.append(("acronym", token)) |
|
elif token.strip(): |
|
words.append(("text", token)) |
|
return words |
|
|
|
|
|
def handle_acronym(acronym): |
|
acronym = re.sub(r'[״":,.!?]', '', acronym) |
|
|
|
phonemes = ''.join(LETTER_TO_PHONEME.get(letter, letter) for letter in acronym[:-1]) |
|
|
|
|
|
last_phoneme = LETTER_TO_PHONEME.get(acronym[-1], acronym[-1]).replace('a', '') |
|
phonemes += last_phoneme |
|
|
|
print(f"Acronym: {acronym} → Phonemes: {phonemes}") |
|
return phonemes |
|
|
|
def convert_txt_to_phonemes(text): |
|
result = [] |
|
for kind, chunk in split_text(text): |
|
if kind== "in_dict": |
|
print(f"Found! {chunk} → {acronym_dict[chunk]}") |
|
result.append(acronym_dict[chunk]) |
|
elif kind == "acronym": |
|
result.append(handle_acronym(chunk)) |
|
else: |
|
diacritized = phonikud_onnx.add_diacritics(chunk) |
|
result.append(phonemize(diacritized)) |
|
phonemes = ' '.join(result) |
|
print(phonemes) |
|
return phonemes |
|
|
|
def heb_to_speech(text, temp_word=" רות", temp_duration=0.36): |
|
|
|
text_with_temp = text + temp_word |
|
phonemes = convert_txt_to_phonemes(text_with_temp) |
|
|
|
|
|
samples, sample_rate = piper.create(phonemes, is_phonemes=True) |
|
sf.write("raw_audio.wav", samples, sample_rate) |
|
|
|
|
|
trim_samples = int(temp_duration * sample_rate) |
|
trimmed_samples = samples[:-trim_samples] |
|
sf.write("final_audio.wav", trimmed_samples, sample_rate) |
|
return "final_audio.wav" |
|
|
|
|
|
acronym_df = pd.read_excel("acronym-phonemes-dict.xlsx") |
|
acronym_dict = {row['acronym']: row['phonemes'] for _, row in acronym_df.iterrows()} |
|
|
|
with gr.Blocks() as demo: |
|
text_input = gr.Textbox(label="Insert Hebrew text", lines=2) |
|
generate_btn = gr.Button("Generate") |
|
audio_output = gr.Audio(label="🔊", type="filepath", interactive=False) |
|
|
|
generate_btn.click(fn=heb_to_speech, inputs=text_input, outputs=audio_output) |
|
|
|
demo.launch(share=True) |