Spaces:
Running
Running
File size: 2,919 Bytes
e4eb5c5 944dedf b34a77f 19be65d a82f51b 669226b a82f51b b34a77f 3e38fbb 669226b 4eb15f6 944dedf f6a94c1 544fdea f6a94c1 944dedf 669226b b34a77f f056f8e 1782e10 bca38c6 b34a77f 669226b b34a77f 669226b f056f8e 1782e10 ab3b67e 669226b 58a2f85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
import wave
import numpy as np
from io import BytesIO
from huggingface_hub import hf_hub_download
from piper import PiperVoice
from transformers import pipeline
import typing
import random
model_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.onnx")
config_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.json")
voice = PiperVoice.load(model_path, config_path)
with open('assets/sentences_es.txt', 'r') as r:
random_quotes = [line.strip() for line in r]
def synthesize_speech(text):
# Create an in-memory buffer for the WAV file
buffer = BytesIO()
with wave.open(buffer, 'wb') as wav_file:
wav_file.setframerate(voice.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # mono
# Synthesize speech
# eztext = preprocess_text(text)
voice.synthesize(text, wav_file)
# Convert buffer to NumPy array for Gradio output
buffer.seek(0)
audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
return audio_data.tobytes(), None
def get_random_quote():
return random.choice(random_quotes)
BANNER_TEXT = """
# Demo en español argentino con Piper
[***Piper***](https://huggingface.co/rhasspy/piper-voices/) es un modelo de abierto de Texto a Voz (TTS)
que permite entrenarse con voz propia, destaca por no requerir conectarse a Internet y ofrecer resultados
sin exigir GPU. Inicialmente diseñado para Raspberri Pi.
Este demo solo muestra español, puedes probar [voces en otros idiomas](https://rhasspy.github.io/piper-samples/).
"""
FOOTER_TEXT = """
# Credits
[voice trained](https://huggingface.co/larcanio/piper-voices) by [larcanio](https://huggingface.co/larcanio/),
[original demo](https://huggingface.co/gyroing/Persian-Piper-Model-gyro) by [gyroing](https://huggingface.co/gyroing/)
on [piper's shoulders](https://huggingface.co/rhasspy/piper-voices) by [rhasspy](https://github.com/rhasspy). [More info](https://huggingface.co/spaces/igortamara/sample-tts-piper/blob/main/README.md)
"""
# Using Gradio Blocks
with gr.Blocks(theme=gr.themes.Base(), title="Piper Argentinian voice test") as demo:
gr.Markdown(BANNER_TEXT)
input_text = gr.Textbox(label=" ", placeholder="Introduce el texto a leer aquí")
with gr.Row():
submit_button = gr.Button("Genera audio")
random_btn = gr.Button('🎲 Cita aleatoria 💬', variant='secondary')
output_audio = gr.Audio(label="Audio generado", type="numpy", interactive=False, streaming=False, autoplay=True)
output_text = gr.Textbox(label="Tokens generados", visible=False)
gr.Markdown(FOOTER_TEXT)
submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
random_btn.click(fn=get_random_quote, inputs=[], outputs=[input_text])
if __name__ == '__main__':
demo.launch()
|