from transformers import pipeline import gradio as gr # Initialize the text-to-speech pipeline with a model from Hugging Face's Model Hub model_name = "kakao-enterprise/vits-ljs" text_to_speech_pipeline = pipeline("text-to-speech", model=model_name) def generate_speech(text): # Generate speech from the input text out = text_to_speech_pipeline(text) # The output is a list of tensors, convert to numpy array audio_data = out[0]["array"] return audio_data, 22050 # Return audio data and sampling rate # Create the Gradio interface interface = gr.Interface(fn=generate_speech, inputs=gr.Textbox(lines=2, placeholder="Type something here..."), outputs=gr.Audio(type="numpy", label="Generated Speech"), title="Text-to-Speech with Hugging Face", description="Enter text to generate speech using a model from Hugging Face's Model Hub.") # Launch the app interface.launch()