Spaces:
ysharma
/
Build error

ysharma HF staff reach-vb HF staff commited on
Commit
c23024f
0 Parent(s):

Duplicate from coqui/xtts

Browse files

Co-authored-by: Vaibhav Srivastav <reach-vb@users.noreply.huggingface.co>

Files changed (7) hide show
  1. .gitattributes +36 -0
  2. README.md +15 -0
  3. app.py +239 -0
  4. examples/.DS_Store +0 -0
  5. examples/female.wav +3 -0
  6. examples/male.wav +3 -0
  7. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/female.wav filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: XTTS
3
+ emoji: 🐸
4
+ colorFrom: green
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.44.2
8
+ app_file: app.py
9
+ pinned: false
10
+ models:
11
+ - coqui/XTTS-v1
12
+ duplicated_from: coqui/xtts
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ import gradio as gr
5
+ from TTS.api import TTS
6
+
7
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
8
+ tts.to("cuda")
9
+
10
+ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
11
+ if agree == True:
12
+ if use_mic == True:
13
+ if mic_file_path is not None:
14
+ speaker_wav=mic_file_path
15
+ else:
16
+ gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
17
+ return (
18
+ None,
19
+ None,
20
+ )
21
+
22
+ else:
23
+ speaker_wav=audio_file_pth
24
+
25
+ if len(prompt)<2:
26
+ gr.Warning("Please give a longer prompt text")
27
+ return (
28
+ None,
29
+ None,
30
+ )
31
+ try:
32
+ tts.tts_to_file(
33
+ text=prompt,
34
+ file_path="output.wav",
35
+ speaker_wav=speaker_wav,
36
+ language=language,
37
+ )
38
+ except RuntimeError as e :
39
+ if "device-assert" in str(e):
40
+ # cannot do anything on cuda device side error, need tor estart
41
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
42
+ print("Cuda device-assert Runtime encountered need restart")
43
+ sys.exit("Exit due to cuda device-assert")
44
+ else:
45
+ raise e
46
+
47
+ return (
48
+ gr.make_waveform(
49
+ audio="output.wav",
50
+ ),
51
+ "output.wav",
52
+ )
53
+ else:
54
+ gr.Warning("Please accept the Terms & Condition!")
55
+ return (
56
+ None,
57
+ None,
58
+ )
59
+
60
+
61
+ title = "Coqui🐸 XTTS"
62
+
63
+ description = """
64
+ <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
65
+ <br/>
66
+ XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
67
+ <br/>
68
+ This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
69
+ <br/>
70
+ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">🐸TTS</a>, where our open-source inference and training code lives.
71
+ <br/>
72
+ <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
73
+ <br/>
74
+ <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
75
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
76
+ </p>
77
+ """
78
+
79
+ article = """
80
+ <div style='margin:20px auto;'>
81
+ <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
82
+ </div>
83
+ """
84
+ examples = [
85
+ [
86
+ "Once when I was six years old I saw a magnificent picture",
87
+ "en",
88
+ "examples/female.wav",
89
+ None,
90
+ False,
91
+ True,
92
+ ],
93
+ [
94
+ "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
95
+ "fr",
96
+ "examples/male.wav",
97
+ None,
98
+ False,
99
+ True,
100
+ ],
101
+ [
102
+ "Als ich sechs war, sah ich einmal ein wunderbares Bild",
103
+ "de",
104
+ "examples/female.wav",
105
+ None,
106
+ False,
107
+ True,
108
+ ],
109
+ [
110
+ "Cuando tenía seis años, vi una vez una imagen magnífica",
111
+ "es",
112
+ "examples/male.wav",
113
+ None,
114
+ False,
115
+ True,
116
+ ],
117
+ [
118
+ "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
119
+ "pt",
120
+ "examples/female.wav",
121
+ None,
122
+ False,
123
+ True,
124
+ ],
125
+ [
126
+ "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
127
+ "pl",
128
+ "examples/male.wav",
129
+ None,
130
+ False,
131
+ True,
132
+ ],
133
+ [
134
+ "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
135
+ "it",
136
+ "examples/female.wav",
137
+ None,
138
+ False,
139
+ True,
140
+ ],
141
+ [
142
+ "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
143
+ "tr",
144
+ "examples/female.wav",
145
+ None,
146
+ False,
147
+ True,
148
+ ],
149
+ [
150
+ "Когда мне было шесть лет, я увидел однажды удивительную картинку",
151
+ "ru",
152
+ "examples/female.wav",
153
+ None,
154
+ False,
155
+ True,
156
+ ],
157
+ [
158
+ "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
159
+ "nl",
160
+ "examples/male.wav",
161
+ None,
162
+ False,
163
+ True,
164
+ ],
165
+ [
166
+ "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
167
+ "cs",
168
+ "examples/female.wav",
169
+ None,
170
+ False,
171
+ True,
172
+ ],
173
+ [
174
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
175
+ "zh-cn",
176
+ "examples/female.wav",
177
+ None,
178
+ False,
179
+ True,
180
+ ],
181
+ ]
182
+
183
+
184
+
185
+ gr.Interface(
186
+ fn=predict,
187
+ inputs=[
188
+ gr.Textbox(
189
+ label="Text Prompt",
190
+ info="One or two sentences at a time is better",
191
+ value="Hi there, I'm your new voice clone. Try your best to upload quality audio. Like my grandma used to say: 'Garbage in, garbage out!'",
192
+ ),
193
+ gr.Dropdown(
194
+ label="Language",
195
+ info="Select an output language for the synthesised speech",
196
+ choices=[
197
+ "en",
198
+ "es",
199
+ "fr",
200
+ "de",
201
+ "it",
202
+ "pt",
203
+ "pl",
204
+ "tr",
205
+ "ru",
206
+ "nl",
207
+ "cs",
208
+ "ar",
209
+ "zh-cn",
210
+ ],
211
+ max_choices=1,
212
+ value="en",
213
+ ),
214
+ gr.Audio(
215
+ label="Reference Audio",
216
+ info="Click on the ✎ button to upload your own target speaker audio",
217
+ type="filepath",
218
+ value="examples/female.wav",
219
+ ),
220
+ gr.Audio(source="microphone",
221
+ type="filepath",
222
+ info="Use your microphone to record audio",
223
+ label="Use Microphone for Reference"),
224
+ gr.Checkbox(label="Check to use Microphone as Reference", value=False),
225
+ gr.Checkbox(
226
+ label="Agree",
227
+ value=False,
228
+ info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
229
+ ),
230
+ ],
231
+ outputs=[
232
+ gr.Video(label="Waveform Visual"),
233
+ gr.Audio(label="Synthesised Audio"),
234
+ ],
235
+ title=title,
236
+ description=description,
237
+ article=article,
238
+ examples=examples,
239
+ ).queue().launch(debug=True)
examples/.DS_Store ADDED
Binary file (6.15 kB). View file
 
examples/female.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
3
+ size 1002030
examples/male.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
3
+ size 762126
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ TTS==0.17.1
2
+ gradio==3.41.2