File size: 2,919 Bytes
e4eb5c5
944dedf
 
 
 
b34a77f
19be65d
a82f51b
669226b
a82f51b
b34a77f
 
3e38fbb
669226b
 
4eb15f6
944dedf
f6a94c1
 
 
 
 
 
 
 
544fdea
 
f6a94c1
 
 
 
 
 
944dedf
669226b
 
 
b34a77f
 
 
 
 
 
 
 
 
 
f056f8e
 
 
 
 
 
 
 
1782e10
bca38c6
b34a77f
 
669226b
 
 
 
b34a77f
669226b
f056f8e
1782e10
ab3b67e
669226b
 
58a2f85
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import wave
import numpy as np
from io import BytesIO
from huggingface_hub import hf_hub_download
from piper import PiperVoice
from transformers import pipeline
import typing
import random

model_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.onnx")
config_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.json")
voice = PiperVoice.load(model_path, config_path)
with open('assets/sentences_es.txt', 'r') as r:
    random_quotes = [line.strip() for line in r]

def synthesize_speech(text):
    # Create an in-memory buffer for the WAV file
    buffer = BytesIO()
    with wave.open(buffer, 'wb') as wav_file:
        wav_file.setframerate(voice.config.sample_rate)
        wav_file.setsampwidth(2)  # 16-bit
        wav_file.setnchannels(1)  # mono

        # Synthesize speech
        # eztext = preprocess_text(text)
        voice.synthesize(text, wav_file)

    # Convert buffer to NumPy array for Gradio output
    buffer.seek(0)
    audio_data = np.frombuffer(buffer.read(), dtype=np.int16)

    return audio_data.tobytes(), None

def get_random_quote():
    return random.choice(random_quotes)

BANNER_TEXT = """
# Demo en español argentino con Piper

[***Piper***](https://huggingface.co/rhasspy/piper-voices/) es un modelo de abierto de Texto a Voz (TTS)
que permite entrenarse con voz propia, destaca por no requerir conectarse a Internet y ofrecer resultados
sin exigir GPU.  Inicialmente diseñado para Raspberri Pi.

Este demo solo muestra español, puedes probar [voces en otros idiomas](https://rhasspy.github.io/piper-samples/).
"""

FOOTER_TEXT = """
# Credits

[voice trained](https://huggingface.co/larcanio/piper-voices) by [larcanio](https://huggingface.co/larcanio/),
[original demo](https://huggingface.co/gyroing/Persian-Piper-Model-gyro) by [gyroing](https://huggingface.co/gyroing/)
on [piper's shoulders](https://huggingface.co/rhasspy/piper-voices) by [rhasspy](https://github.com/rhasspy). [More info](https://huggingface.co/spaces/igortamara/sample-tts-piper/blob/main/README.md)
"""

# Using Gradio Blocks
with gr.Blocks(theme=gr.themes.Base(), title="Piper Argentinian voice test") as demo:
    gr.Markdown(BANNER_TEXT)
    input_text = gr.Textbox(label=" ", placeholder="Introduce el texto a leer aquí")
    with gr.Row():
        submit_button = gr.Button("Genera audio")
        random_btn = gr.Button('🎲 Cita aleatoria 💬', variant='secondary')
    output_audio = gr.Audio(label="Audio generado", type="numpy", interactive=False, streaming=False, autoplay=True)
    output_text = gr.Textbox(label="Tokens generados", visible=False)

    gr.Markdown(FOOTER_TEXT)

    submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
    random_btn.click(fn=get_random_quote, inputs=[], outputs=[input_text])


if __name__ == '__main__':
    demo.launch()