Abdelmageed95 commited on
Commit
12cd07f
1 Parent(s): 3a61f05

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import torch
4
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
+ import gradio as gr
6
+ import librosa
7
+ import numpy as np
8
+ import torch
9
+
10
+ checkpoint = "microsoft/speecht5_tts"
11
+ processor = SpeechT5Processor.from_pretrained(checkpoint)
12
+ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
13
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
14
+
15
+ def predict(text):
16
+ if len(text.strip()) == 0:
17
+ return (16000, np.zeros(0).astype(np.int16))
18
+ inputs = processor(text=text, return_tensors="pt")
19
+ # limit input length
20
+ input_ids = inputs["input_ids"]
21
+ input_ids = input_ids[..., :model.config.max_text_positions]
22
+ speaker_embedding = np.load("cmu_us_ksp_arctic-wav-arctic_b0087.npy")
23
+ speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
24
+ speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
25
+ speech = (speech.numpy() * 32767).astype(np.int16)
26
+ return (16000, speech)
27
+
28
+ gr.Interface(
29
+ fn=predict,
30
+ inputs=[
31
+ gr.Text(label="Input Text"),
32
+ gr.Radio(label="Speaker", choices=[
33
+ "KSP (male)"
34
+ ],
35
+ value="KSP (male)"),
36
+ ],
37
+ outputs=[
38
+ gr.Audio(label="Generated Speech", type="numpy"),
39
+ ],
40
+ title=title,
41
+ description=description,
42
+ article=article,
43
+ examples=examples,
44
+ ).launch()