Spaces:
Sleeping
Sleeping
import gradio as gr | |
import wave | |
import numpy as np | |
from io import BytesIO | |
from huggingface_hub import hf_hub_download | |
from piper import PiperVoice | |
from transformers import pipeline | |
import typing | |
import random | |
model_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.onnx") | |
config_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.json") | |
voice = PiperVoice.load(model_path, config_path) | |
with open('assets/sentences_es.txt', 'r') as r: | |
random_quotes = [line.strip() for line in r] | |
def synthesize_speech(text): | |
# Create an in-memory buffer for the WAV file | |
buffer = BytesIO() | |
with wave.open(buffer, 'wb') as wav_file: | |
wav_file.setframerate(voice.config.sample_rate) | |
wav_file.setsampwidth(2) # 16-bit | |
wav_file.setnchannels(1) # mono | |
# Synthesize speech | |
# eztext = preprocess_text(text) | |
voice.synthesize(text, wav_file) | |
# Convert buffer to NumPy array for Gradio output | |
buffer.seek(0) | |
audio_data = np.frombuffer(buffer.read(), dtype=np.int16) | |
return audio_data.tobytes(), None | |
def get_random_quote(): | |
return random.choice(random_quotes) | |
BANNER_TEXT = """ | |
# Demo en español argentino con Piper | |
[***Piper***](https://huggingface.co/rhasspy/piper-voices/) es un modelo de abierto de Texto a Voz (TTS) | |
que permite entrenarse con voz propia, destaca por no requerir conectarse a Internet y ofrecer resultados | |
sin exigir GPU. Inicialmente diseñado para Raspberri Pi. | |
Este demo solo muestra español, puedes probar [voces en otros idiomas](https://rhasspy.github.io/piper-samples/). | |
""" | |
FOOTER_TEXT = """ | |
# Credits | |
[voice trained](https://huggingface.co/larcanio/piper-voices) by [larcanio](https://huggingface.co/larcanio/), | |
[original demo](https://huggingface.co/gyroing/Persian-Piper-Model-gyro) by [gyroing](https://huggingface.co/gyroing/) | |
on [piper's shoulders](https://huggingface.co/rhasspy/piper-voices) by [rhasspy](https://github.com/rhasspy). [More info](https://huggingface.co/spaces/igortamara/sample-tts-piper/blob/main/README.md) | |
""" | |
# Using Gradio Blocks | |
with gr.Blocks(theme=gr.themes.Base(), title="Piper Argentinian voice test") as demo: | |
gr.Markdown(BANNER_TEXT) | |
input_text = gr.Textbox(label=" ", placeholder="Introduce el texto a leer aquí") | |
with gr.Row(): | |
submit_button = gr.Button("Genera audio") | |
random_btn = gr.Button('🎲 Cita aleatoria 💬', variant='secondary') | |
output_audio = gr.Audio(label="Audio generado", type="numpy", interactive=False, streaming=False, autoplay=True) | |
output_text = gr.Textbox(label="Tokens generados", visible=False) | |
gr.Markdown(FOOTER_TEXT) | |
submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text]) | |
random_btn.click(fn=get_random_quote, inputs=[], outputs=[input_text]) | |
if __name__ == '__main__': | |
demo.launch() | |