ejazhabibdar's picture
Create app.py
9897ed3
raw
history blame contribute delete
No virus
1.29 kB
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
import torch
import gradio as gr
# Load the TTS model and processor
model_checkpoint = "ejazhabibdar/speecht5_finetuned_voxpopuli_nl"
model = SpeechT5ForTextToSpeech.from_pretrained(model_checkpoint)
processor = SpeechT5Processor.from_pretrained(model_checkpoint)
# Load the vocoder for generating speech
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
vocoder.to(device)
# Set the model to evaluation mode
model.eval()
vocoder.eval()
# Define the TTS function
def text_to_speech(text):
# Preprocess the input text
inputs = processor(text=text, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
# Generate speech
with torch.no_grad():
speech = model.generate_speech(inputs["input_ids"], vocoder=vocoder)
return speech.tolist()
# Create a Gradio interface
iface = gr.Interface(
fn=text_to_speech,
inputs="text",
outputs="audio",
title="Text-to-Speech",
description="Enter the text and listen to the generated speech.",
theme="huggingface",
)
if __name__ == "__main__":
iface.launch()