Vaibhav Srivastav commited on
Commit
142fdc7
1 Parent(s): 33e0dc8
Files changed (1) hide show
  1. app.py +41 -10
app.py CHANGED
@@ -3,14 +3,18 @@ from TTS.api import TTS
3
 
4
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
5
 
6
- def predict(prompt, language, audio_file_pth):
7
 
8
- tts.tts_to_file(text=prompt,
9
- file_path="output.wav",
10
- speaker_wav=audio_file_pth,
11
- language=language)
 
 
 
12
 
13
- return gr.make_waveform(audio="output.wav",)
 
 
14
 
15
 
16
  title = "XTTS: MVP"
@@ -18,12 +22,39 @@ title = "XTTS: MVP"
18
  gr.Interface(
19
  fn=predict,
20
  inputs=[
21
- gr.Textbox(label="Prompt", info = "One or two sentences at a time is better* (max: 10)", placeholder = "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",),
22
- gr.Dropdown(choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cz", "ar", "zh"], max_choices=1),
23
- gr.Audio(label="Upload Speaker WAV", type="filepath"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
  outputs=[
26
  gr.Video(label="Synthesised Speech"),
27
  ],
28
  title=title,
29
- ).launch(debug=True)
 
3
 
4
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
5
 
 
6
 
7
+ def predict(prompt, language, audio_file_pth):
8
+ tts.tts_to_file(
9
+ text=prompt,
10
+ file_path="output.wav",
11
+ speaker_wav=audio_file_pth,
12
+ language=language,
13
+ )
14
 
15
+ return gr.make_waveform(
16
+ audio="output.wav",
17
+ )
18
 
19
 
20
  title = "XTTS: MVP"
 
22
  gr.Interface(
23
  fn=predict,
24
  inputs=[
25
+ gr.Textbox(
26
+ label="Text Prompt",
27
+ info="One or two sentences at a time is better",
28
+ placeholder="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
29
+ ),
30
+ gr.Dropdown(
31
+ label="Language",
32
+ info="Select an output language for the synthesised speech",
33
+ choices=[
34
+ "en",
35
+ "es",
36
+ "fr",
37
+ "de",
38
+ "it",
39
+ "pt",
40
+ "pl",
41
+ "tr",
42
+ "ru",
43
+ "nl",
44
+ "cz",
45
+ "ar",
46
+ "zh",
47
+ ],
48
+ max_choices=1,
49
+ ),
50
+ gr.Audio(
51
+ label="Reference Audio",
52
+ info="Upload a reference audio for target speaker voice",
53
+ type="filepath",
54
+ ),
55
  ],
56
  outputs=[
57
  gr.Video(label="Synthesised Speech"),
58
  ],
59
  title=title,
60
+ ).launch(debug=True)