Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from datasets import load_dataset | |
import soundfile as sf | |
import torch | |
import gradio as gr | |
# Text-to-speech pipeline | |
synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts") | |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
def synthesize_speech(text, speaker_id): | |
speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["xvector"]).unsqueeze(0) | |
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding}) | |
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) | |
return "speech.wav" | |
# Define your interface | |
iface = gr.Interface( | |
fn=synthesize_speech, | |
inputs=[gr.Textbox(label="Enter your text"), gr.Slider(minimum=0, maximum=len(embeddings_dataset)-1, label="Speaker ID")], | |
outputs="audio", | |
title="Text-to-Speech Synthesizer", | |
description="Type a text and choose a speaker to synthesize speech." | |
) | |
# Launch the app | |
iface.launch() | |