akhaliq HF staff commited on
Commit
d597646
1 Parent(s): de7b8e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import gradio as gr
4
+
5
+ device="cpu"
6
+ bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
7
+ processor = bundle.get_text_processor()
8
+ tacotron2 = bundle.get_tacotron2().to(device)
9
+
10
+ # Workaround to load model mapped on GPU
11
+ # https://stackoverflow.com/a/61840832
12
+ waveglow = torch.hub.load(
13
+ "NVIDIA/DeepLearningExamples:torchhub",
14
+ "nvidia_waveglow",
15
+ model_math="fp32",
16
+ pretrained=False,
17
+ )
18
+ checkpoint = torch.hub.load_state_dict_from_url(
19
+ "https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", # noqa: E501
20
+ progress=False,
21
+ map_location=device,
22
+ )
23
+ state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
24
+
25
+ waveglow.load_state_dict(state_dict)
26
+ waveglow = waveglow.remove_weightnorm(waveglow)
27
+ waveglow = waveglow.to(device)
28
+ waveglow.eval()
29
+
30
+ def inference(text):
31
+
32
+ with torch.inference_mode():
33
+ processed, lengths = processor(text)
34
+ processed = processed.to(device)
35
+ lengths = lengths.to(device)
36
+ spec, _, _ = tacotron2.infer(processed, lengths)
37
+
38
+
39
+
40
+ with torch.no_grad():
41
+ waveforms = waveglow.infer(spec)
42
+
43
+ torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050)
44
+ return "output_waveglow.wav"
45
+
46
+ title="TACOTRON 2"
47
+ description="Gradio demo for TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
48
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
49
+ examples=[["life is like a box of chocolates"]]
50
+ gr.Interface(inference,"text",gr.outputs.Audio(type="file"),title=title,description=description,article=article,examples=examples).launch(enable_queue=True)