import gradio import wave import numpy as np from io import BytesIO from huggingface_hub import hf_hub_download from piper import PiperVoice def synthesize_speech(text, sentence_silence, length_scale): model_path = hf_hub_download(repo_id="RaivisDejus/Piper-lv_LV-Aivars-medium", filename="lv_LV-aivars-medium.onnx") config_path = hf_hub_download(repo_id="RaivisDejus/Piper-lv_LV-Aivars-medium", filename="lv_LV-aivars-medium.onnx.json") voice = PiperVoice.load(model_path, config_path) buffer = BytesIO() with wave.open(buffer, 'wb') as wav_file: wav_file.setframerate(voice.config.sample_rate) wav_file.setsampwidth(2) wav_file.setnchannels(1) voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale) buffer.seek(0) audio_data = np.frombuffer(buffer.read(), dtype=np.int16) return audio_data.tobytes() # Gradio Interface with gradio.Blocks(theme=gradio.themes.Base()) as demo: gradio.Markdown( """ # Latvian Piper TTS Test Latvian Piper TTS [model](https://huggingface.co/RaivisDejus/Piper-lv_LV-Aivars-medium) Aivars. Available parameters: - Sentence silence - The `--sentence-silence` piper parameter controls silence duration between sentences. - Length scale - The `--length-scale` piper parameter controls length and speach speed of generated audio. Support development of Latvian speech technologies and contribute to the [Balsu talka](https://balsutalka.lv/). If you notice strange pronunciation of some words, please report them [here](https://huggingface.co/RaivisDejus/Piper-lv_LV-Aivars-medium/discussions/2) """) input_text = gradio.Textbox( label="Text to generate", lines=3, value="“Balsu talka” ir projekts latviešu valodas nākotnei. Tā mērķis ir iemūžināt mūsdienās runāto latviešu valodu un nodrošināt latviešu valodas pieejamību tehnoloģiju attīstībai. Šo mērķi varam sasniegt tikai kopīgiem spēkiem, tāpēc aicinām katru ierunāt vismaz piecus teikumus, lai savāktu pēc iespējas vairāk runas paraugu un izveidotu daudzveidīgu, atvērtu un ikvienam pieejamu latviešu runas datu kopu." ) sentence_silence = gradio.Slider(label="Sentence silence", minimum=0.0, maximum=2.0, step=0.05, value=0.1) length_scale = gradio.Slider(label="Length scale", minimum=0.5, maximum=2.0, step=0.05, value=1.0) submit_button = gradio.Button("Generate") output_audio = gradio.Audio( label="Generated speech", autoplay=False, type="numpy", show_download_button=True, show_share_button=False ) submit_button.click(synthesize_speech, inputs=[input_text, sentence_silence, length_scale], outputs=[output_audio]) demo.launch()