Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from datasets import load_dataset
|
3 |
+
import soundfile as sf
|
4 |
+
import torch
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
# Text-to-speech pipeline
|
8 |
+
synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
|
9 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
10 |
+
|
11 |
+
def synthesize_speech(text, speaker_id):
|
12 |
+
speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["xvector"]).unsqueeze(0)
|
13 |
+
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
|
14 |
+
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
|
15 |
+
return "speech.wav"
|
16 |
+
|
17 |
+
# Define your interface
|
18 |
+
iface = gr.Interface(
|
19 |
+
fn=synthesize_speech,
|
20 |
+
inputs=[gr.Textbox(label="Enter your text"), gr.Slider(minimum=0, maximum=len(embeddings_dataset)-1, label="Speaker ID")],
|
21 |
+
outputs="audio",
|
22 |
+
title="Text-to-Speech Synthesizer",
|
23 |
+
description="Type a text and choose a speaker to synthesize speech."
|
24 |
+
)
|
25 |
+
|
26 |
+
# Launch the app
|
27 |
+
iface.launch()
|