Text_to_Speech / app.py
hbui's picture
Update app.py
4cccb66 verified
from transformers import pipeline
import gradio as gr
# Initialize the text-to-speech pipeline with a model from Hugging Face's Model Hub
model_name = "kakao-enterprise/vits-ljs"
text_to_speech_pipeline = pipeline("text-to-speech", model=model_name)
def generate_speech(text):
# Generate speech from the input text
out = text_to_speech_pipeline(text)
# The output is a list of tensors, convert to numpy array
audio_data = out[0]["array"]
return audio_data, 22050 # Return audio data and sampling rate
# Create the Gradio interface
interface = gr.Interface(fn=generate_speech,
inputs=gr.Textbox(lines=2, placeholder="Type something here..."),
outputs=gr.Audio(type="numpy", label="Generated Speech"),
title="Text-to-Speech with Hugging Face",
description="Enter text to generate speech using a model from Hugging Face's Model Hub.")
# Launch the app
interface.launch()