Maoweicao commited on
Commit
ca34e42
1 Parent(s): a6a6a22
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ffmpeg filter=lfs diff=lfs merge=lfs -text
37
+ *.wav filter=lfs diff=lfs merge=lfs -text
38
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
39
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Xttsv2
3
- emoji: 🌖
4
  colorFrom: green
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.3.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: XTTS
3
+ emoji: 🐸
4
  colorFrom: green
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.44.3
8
  app_file: app.py
9
  pinned: false
10
+ models:
11
+ - coqui/XTTS-v1
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os,stat
3
+ import subprocess
4
+ import random
5
+ from zipfile import ZipFile
6
+ import uuid
7
+
8
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
9
+ os.environ["COQUI_TOS_AGREED"] = "1"
10
+
11
+ # langid is used to detect language for longer text
12
+ # Most users expect text to be their own language, there is checkbox to disable it
13
+ import langid
14
+
15
+ import gradio as gr
16
+ from TTS.api import TTS
17
+ from TTS.tts.configs.xtts_config import XttsConfig
18
+ from TTS.tts.models.xtts import Xtts
19
+ from TTS.utils.generic_utils import get_user_data_dir
20
+ HF_TOKEN = os.environ.get("HF_TOKEN")
21
+ from huggingface_hub import HfApi
22
+ # will use api to restart space on a unrecoverable error
23
+ api = HfApi(token=HF_TOKEN)
24
+ repo_id = "coqui/xtts"
25
+
26
+ # Use never ffmpeg binary for Ubuntu20 to use denoising for microphone input
27
+ print("Export newer ffmpeg binary for denoise filter")
28
+ ZipFile("ffmpeg.zip").extractall()
29
+ print("Make ffmpeg binary executable")
30
+ st = os.stat('ffmpeg')
31
+ os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
32
+
33
+ # Load TTS
34
+ from TTS.utils.manage import ModelManager
35
+ import torch
36
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
37
+ ModelManager().download_model(model_name)
38
+ model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
39
+ print("XTTS downloaded")
40
+ tts = TTS(model_name)
41
+ if torch.cuda.is_available():
42
+ tts.to("cuda")
43
+ else:
44
+ tts.to("cpu")
45
+
46
+ # This is for debugging purposes only
47
+ DEVICE_ASSERT_DETECTED=0
48
+ DEVICE_ASSERT_PROMPT=None
49
+ DEVICE_ASSERT_LANG=None
50
+
51
+ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree,):
52
+ if agree == True:
53
+ supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
54
+
55
+ if language not in supported_languages:
56
+ gr.Warning("Language you put in is not in is not in our Supported Languages, please choose from dropdown")
57
+
58
+ return (
59
+ None,
60
+ None,
61
+ None,
62
+ )
63
+
64
+ language_predicted=langid.classify(prompt)[0].strip() # strip need as there is space at end!
65
+
66
+ # tts expects chinese as zh-cn
67
+ if language_predicted == "zh":
68
+ #we use zh-cn
69
+ language_predicted = "zh-cn"
70
+ print(f"Detected language:{language_predicted}, Chosen language:{language}")
71
+
72
+ # After text character length 15 trigger language detection
73
+ if len(prompt)>15:
74
+ # allow any language for short text as some may be common
75
+ # If user unchecks language autodetection it will not trigger
76
+ # You may remove this completely for own use
77
+ if language_predicted != language and not no_lang_auto_detect:
78
+ #Please duplicate and remove this check if you really want this
79
+ #Or auto-detector fails to identify language (which it can on pretty short text or mixed text)
80
+ gr.Warning(f"It looks like your text isn’t the language you chose , if you’re sure the text is the same language you chose, please check disable language auto-detection checkbox" )
81
+
82
+ return (
83
+ None,
84
+ None,
85
+ None,
86
+ )
87
+
88
+
89
+ if use_mic == True:
90
+ if mic_file_path is not None:
91
+ speaker_wav=mic_file_path
92
+ else:
93
+ gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
94
+ return (
95
+ None,
96
+ None,
97
+ None,
98
+ )
99
+
100
+ else:
101
+ speaker_wav=audio_file_pth
102
+
103
+
104
+ # Filtering for microphone input, as it has BG noise, maybe silence in beginning and end
105
+ # This is fast filtering not perfect
106
+
107
+ # Apply all on demand
108
+ lowpassfilter=denoise=trim=loudness=True
109
+
110
+ if lowpassfilter:
111
+ lowpass_highpass="lowpass=8000,highpass=75,"
112
+ else:
113
+ lowpass_highpass=""
114
+
115
+ if trim:
116
+ # better to remove silence in beginning and end for microphone
117
+ trim_silence="areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,"
118
+ else:
119
+ trim_silence=""
120
+
121
+ if (voice_cleanup):
122
+ try:
123
+ out_filename = speaker_wav + str(uuid.uuid4()) + ".wav" #ffmpeg to know output format
124
+
125
+ #we will use newer ffmpeg as that has afftn denoise filter
126
+ shell_command = f"./ffmpeg -y -i {speaker_wav} -af {lowpass_highpass}{trim_silence} {out_filename}".split(" ")
127
+
128
+ command_result = subprocess.run([item for item in shell_command], capture_output=False,text=True, check=True)
129
+ speaker_wav=out_filename
130
+ print("Filtered microphone input")
131
+ except subprocess.CalledProcessError:
132
+ # There was an error - command exited with non-zero code
133
+ print("Error: failed filtering, use original microphone input")
134
+ else:
135
+ speaker_wav=speaker_wav
136
+
137
+ if len(prompt)<2:
138
+ gr.Warning("Please give a longer prompt text")
139
+ return (
140
+ None,
141
+ None,
142
+ None,
143
+ )
144
+ if len(prompt)>200:
145
+ gr.Warning("Text length limited to 200 characters for this demo, please try shorter text. You can clone this space and edit code for your own usage")
146
+ return (
147
+ None,
148
+ None,
149
+ None,
150
+ )
151
+ global DEVICE_ASSERT_DETECTED
152
+ if DEVICE_ASSERT_DETECTED:
153
+ global DEVICE_ASSERT_PROMPT
154
+ global DEVICE_ASSERT_LANG
155
+ #It will likely never come here as we restart space on first unrecoverable error now
156
+ print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
157
+
158
+ try:
159
+ tts.tts_to_file(
160
+ text=prompt,
161
+ file_path="output.wav",
162
+ language=language,
163
+ speaker_wav=speaker_wav,
164
+ )
165
+ except RuntimeError as e :
166
+ if "device-side assert" in str(e):
167
+ # cannot do anything on cuda device side error, need tor estart
168
+ print(f"Exit due to: Unrecoverable exception caused by language:{language} prompt:{prompt}", flush=True)
169
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
170
+ print("Cuda device-assert Runtime encountered need restart")
171
+ if not DEVICE_ASSERT_DETECTED:
172
+ DEVICE_ASSERT_DETECTED=1
173
+ DEVICE_ASSERT_PROMPT=prompt
174
+ DEVICE_ASSERT_LANG=language
175
+
176
+
177
+ # HF Space specific.. This error is unrecoverable need to restart space
178
+ api.restart_space(repo_id=repo_id)
179
+ else:
180
+ print("RuntimeError: non device-side assert error:", str(e))
181
+ raise e
182
+ return (
183
+ gr.make_waveform(
184
+ audio="output.wav",
185
+ ),
186
+ "output.wav",
187
+ speaker_wav,
188
+ )
189
+ else:
190
+ gr.Warning("Please accept the Terms & Condition!")
191
+ return (
192
+ None,
193
+ None,
194
+ None,
195
+ )
196
+
197
+
198
+ title = "🐸 XTTSv2 - 3秒语音合成,支持中英双语,告别电音!"
199
+
200
+ description = f"""
201
+ ## <center>🌟 - 只需上传3~10秒语音,支持13种语言,中文能力极大增强!</center>
202
+ ### <center>🤗 - 使用[Colab笔记本](https://github.com/KevinWang676/Bark-Voice-Cloning)运行;Powered by [Coqui AI](https://coqui.ai/)</center>
203
+ ### <center>🌊 - 更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>
204
+ ### <center>😺️☘️ - 猫尾草修改版 - coqui xTTS v2</center>
205
+ ### <center>女声示例 - The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.</center>
206
+ ### <center>男声示例 - It is a pretty little spot there: a grass plateau, running along by the water's edge, and overhung by willows. </center>
207
+ """.strip()
208
+
209
+
210
+ article = """
211
+ <div style='margin:20px auto;'>
212
+ <p>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</p>
213
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘</p>
214
+ </div>
215
+ """
216
+ examples = [
217
+ [
218
+ "Once when I was six years old I saw a magnificent picture",
219
+ "en",
220
+ "examples/female.wav",
221
+ None,
222
+ False,
223
+ False,
224
+ False,
225
+ True,
226
+
227
+ ],
228
+ [
229
+ "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
230
+ "fr",
231
+ "examples/male.wav",
232
+ None,
233
+ False,
234
+ False,
235
+ False,
236
+ True,
237
+ False,
238
+ ],
239
+ [
240
+ "Als ich sechs war, sah ich einmal ein wunderbares Bild",
241
+ "de",
242
+ "examples/female.wav",
243
+ None,
244
+ False,
245
+ False,
246
+ False,
247
+ True,
248
+ ],
249
+ [
250
+ "Cuando tenía seis años, vi una vez una imagen magnífica",
251
+ "es",
252
+ "examples/male.wav",
253
+ None,
254
+ False,
255
+ False,
256
+ False,
257
+ True,
258
+ ],
259
+ [
260
+ "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
261
+ "pt",
262
+ "examples/female.wav",
263
+ None,
264
+ False,
265
+ False,
266
+ False,
267
+ True,
268
+ ],
269
+ [
270
+ "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
271
+ "pl",
272
+ "examples/male.wav",
273
+ None,
274
+ False,
275
+ False,
276
+ False,
277
+ True,
278
+ ],
279
+ [
280
+ "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
281
+ "it",
282
+ "examples/female.wav",
283
+ None,
284
+ False,
285
+ False,
286
+ False,
287
+ True,
288
+ ],
289
+ [
290
+ "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
291
+ "tr",
292
+ "examples/female.wav",
293
+ None,
294
+ False,
295
+ False,
296
+ False,
297
+ True,
298
+ ],
299
+ [
300
+ "Когда мне было шесть лет, я увидел однажды удивительную картинку",
301
+ "ru",
302
+ "examples/female.wav",
303
+ None,
304
+ False,
305
+ False,
306
+ False,
307
+ True,
308
+ ],
309
+ [
310
+ "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
311
+ "nl",
312
+ "examples/male.wav",
313
+ None,
314
+ False,
315
+ False,
316
+ False,
317
+ True,
318
+ ],
319
+ [
320
+ "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
321
+ "cs",
322
+ "examples/female.wav",
323
+ None,
324
+ False,
325
+ False,
326
+ False,
327
+ True,
328
+ ],
329
+ [
330
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
331
+ "zh-cn",
332
+ "examples/female.wav",
333
+ None,
334
+ False,
335
+ False,
336
+ False,
337
+ True,
338
+ ],
339
+ [
340
+ "かつて 六歳のとき、素晴らしい絵を見ました",
341
+ "ja",
342
+ "examples/female.wav",
343
+ None,
344
+ False,
345
+ True,
346
+ False,
347
+ True,
348
+ ],
349
+ [
350
+ "한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
351
+ "ko",
352
+ "examples/female.wav",
353
+ None,
354
+ False,
355
+ True,
356
+ False,
357
+ True,
358
+ ],
359
+ [
360
+ "Egyszer hat éves koromban láttam egy csodálatos képet",
361
+ "hu",
362
+ "examples/male.wav",
363
+ None,
364
+ False,
365
+ True,
366
+ False,
367
+ True,
368
+ ],
369
+ [
370
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
371
+ "zh-cn",
372
+ "examples/xiaoxiao(edgetts).mp3",
373
+ None,
374
+ False,
375
+ False,
376
+ False,
377
+ True,
378
+ ],
379
+ [
380
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
381
+ "zh-cn",
382
+ "examples/jenny(edgetts).mp3",
383
+ None,
384
+ False,
385
+ False,
386
+ False,
387
+ True,
388
+ ],
389
+ [
390
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
391
+ "zh-cn",
392
+ "examples/xiaoni(edgetts).mp3",
393
+ None,
394
+ False,
395
+ False,
396
+ False,
397
+ True,
398
+ ],
399
+ [
400
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
401
+ "zh-cn",
402
+ "examples/hsiaochen(edgetts).mp3",
403
+ None,
404
+ False,
405
+ False,
406
+ False,
407
+ True,
408
+ ],
409
+ ]
410
+
411
+
412
+
413
+ gr.Interface(
414
+ fn=predict,
415
+ inputs=[
416
+ gr.Textbox(
417
+ label="想要合成的文本内容",
418
+ lines=3,
419
+ placeholder="想说却还没说的 还很多"
420
+ ),
421
+ gr.Dropdown(
422
+ label="请选择文本内容对应的语言",
423
+ choices=[
424
+ "en",
425
+ "es",
426
+ "fr",
427
+ "de",
428
+ "it",
429
+ "pt",
430
+ "pl",
431
+ "tr",
432
+ "ru",
433
+ "nl",
434
+ "cs",
435
+ "ar",
436
+ "zh-cn",
437
+ "ja",
438
+ "ko",
439
+ "hu"
440
+ ],
441
+ max_choices=1,
442
+ value="zh-cn",
443
+ ),
444
+ gr.Audio(
445
+ label="通过文件上传语音",
446
+ type="filepath",
447
+ value="examples/female.wav",
448
+ ),
449
+ gr.Audio(source="microphone",
450
+ type="filepath",
451
+ label="使用麦克风上传语音",
452
+ info="移动端更稳定,电脑端可能无法上传",
453
+ streaming=True,
454
+ ),
455
+ gr.Checkbox(label="是否使用麦克风上传语音",
456
+ value=False,
457
+ info="默认为否",),
458
+ gr.Checkbox(label="是否需要去除背景音",
459
+ value=False,
460
+ info="默认为否",
461
+ ),
462
+ gr.Checkbox(label="不使用自动探测语言",
463
+ value=False,
464
+ info="勾选此选项则不使用自动探测语言",),
465
+ gr.Checkbox(
466
+ label="使用条款",
467
+ value=True,
468
+ info="我承诺:不会利用此程序生成对个人或组织造成侵害的任何内容",
469
+ ),
470
+
471
+
472
+ ],
473
+ outputs=[
474
+ gr.Video(label="为您合成的专属音频"),
475
+ gr.Audio(label="Synthesised Audio", visible=False),
476
+ gr.Audio(label="Reference Audio Used", visible=False),
477
+ ],
478
+ title=title,
479
+ description=description,
480
+ article=article,
481
+ examples=examples,
482
+ ).queue().launch(debug=True,show_api=False,server_name="0.0.0.0")
examples/.DS_Store ADDED
Binary file (6.15 kB). View file
 
examples/female.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
3
+ size 1002030
examples/hsiaochen(edgetts).mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a944fcf4f031f20a0be95e14afbd6e83f84aeb5d3b9316dee493dcb13fcf63
3
+ size 49824
examples/jenny(edgetts).mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e84470e6c0d8357ffcb7632139a204f2857fc6493037dcce16953aca6a5b00
3
+ size 51120
examples/male.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
3
+ size 762126
examples/xiaoni(edgetts).mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12dc1fb1113b1ad4c051277479e204b55a01b8179b70bbec9e85514d2341881d
3
+ size 46800
examples/xiaoxiao(edgetts).mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7a4c69615343adc14ae4ac3bdca3eeaa11f6282fad9b4890ec0dbc5cbc5107
3
+ size 55584
ffmpeg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51bbdd978250ff7239d213940ff2c92ea56e7d768e8db98d9cbc4079d82e42dc
3
+ size 78999296
ffmpeg.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c04aa2958762686cf94a3bd1456b4738fd537d19bb0a9b622fc788a5e4ce723
3
+ size 29207056
flagged/Reference Audio Used/3f36a128f046665dafb62f8073ad1e8e20733b83/HoneySelect2 2023-11-12 19-08-54_2023111219272 截取视频-0-100.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8a75d41d92b0dcde3e2625b66b9325bf597567f854d54ecbc16bdc74c252b0
3
+ size 1986092
flagged/Synthesised Audio/0fde6535955565dd9a58df2d06670006bf93100e/output.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1daec472ffdc4813221dd49f758a64333bf29ae3b970297c5c6c221e61514cd5
3
+ size 104524
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 想要合成的文本内容,请选择文本内容对应的语言,通过文件上传语音,使用麦克风上传语音,是否使用麦克风上传语音,是否需要去除背景音,Do not use language auto-detect,使用条款,为您合成的专属音频,Synthesised Audio,Reference Audio Used,flag,username,timestamp
2
+ 英特尔在近期展示了其最新的封装技术——EMIB和Foveros,可实现封装上的多个芯片并排连接或以3D的方式堆叠在一起,并拿出了集成16GB三星LPDDR5X-7500高频内存的Meteor Lake CPU成品,可提供120GB/s的内存峰值带宽,远高于目前的DDR5-5200与LPDDR5-6400。,zh-cn,/home/maoweicao/xtts/flagged/通过文件上传语音/a7e3b789d5cc48818fb5f2281c52cfb16d942bad/tmplqyi0alf.mp3,,False,True,False,True,/home/maoweicao/xtts/flagged/为您合成的专属音频/7f3de074c567d9ff798565df2096ffd03b72d791/tmptr9aormm.mp4,/home/maoweicao/xtts/flagged/Synthesised Audio/0fde6535955565dd9a58df2d06670006bf93100e/output.wav,/home/maoweicao/xtts/flagged/Reference Audio Used/3f36a128f046665dafb62f8073ad1e8e20733b83/HoneySelect2 2023-11-12 19-08-54_2023111219272 截取视频-0-100.wav,,,2023-11-15 22:43:40.741877
flagged/为您合成的专属音频/7f3de074c567d9ff798565df2096ffd03b72d791/tmptr9aormm.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39ec58b7a260ed7c74fbbd7830ad143cd1b4c2818747c6bc409352b219da300
3
+ size 32847
flagged/通过文件上传语音/a7e3b789d5cc48818fb5f2281c52cfb16d942bad/tmplqyi0alf.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed8c22469ad5fd1688b9e66a01a2a7c4726df26cc48bc95520f4bd60bb1e643
3
+ size 166125
output.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c0daeb888c2715efdff597043dd0968fbee0a81e6ca81e6e6d7a24875b6d8b
3
+ size 224844
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ unzip
requirements.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Preinstall requirements from TTS
2
+ numpy==1.22.0;python_version<="3.10"
3
+ numpy==1.24.3;python_version>"3.10"
4
+ cython==0.29.30
5
+ scipy>=1.11.2
6
+ soundfile==0.12.*
7
+ librosa==0.10.*
8
+ scikit-learn==1.3.0
9
+ numba==0.55.1;python_version<"3.9"
10
+ numba==0.57.0;python_version>="3.9"
11
+ inflect==5.6.*
12
+ tqdm==4.64.*
13
+ anyascii==0.3.*
14
+ pyyaml==6.*
15
+ fsspec==2023.6.0 # <= 2023.9.1 makes aux tests fail
16
+ aiohttp==3.8.*
17
+ packaging==23.1
18
+ # deps for examples
19
+ flask==2.*
20
+ # deps for inference
21
+ pysbd==0.3.4
22
+ # deps for notebooks
23
+ umap-learn==0.5.*
24
+ pandas>=1.4,<2.0
25
+ # deps for training
26
+ matplotlib==3.7.*
27
+ # coqui stack
28
+ trainer
29
+ # config management
30
+ coqpit>=0.0.16
31
+ # chinese g2p deps
32
+ jieba
33
+ pypinyin==0.47.1
34
+ # gruut+supported langs
35
+ gruut[de,es,fr]==2.2.3
36
+ # deps for korean
37
+ jamo
38
+ nltk
39
+ g2pkk>=0.1.1
40
+ # deps for bangla
41
+ bangla
42
+ bnnumerizer
43
+ bnunicodenormalizer
44
+ #deps for tortoise
45
+ k_diffusion
46
+ einops==0.6.*
47
+ transformers==4.33.*
48
+ #deps for bark
49
+ encodec==0.1.*
50
+ # deps for XTTS
51
+ unidecode==1.3.*
52
+ langid
53
+ # Install tts
54
+ TTS==0.17.4
requirments2.txt ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ accelerate==0.24.1
3
+ aiofiles==23.2.1
4
+ aiohttp==3.8.6
5
+ aiosignal==1.3.1
6
+ altair==5.1.2
7
+ annotated-types==0.6.0
8
+ anyascii==0.3.2
9
+ anyio==3.7.1
10
+ appdirs==1.4.4
11
+ async-timeout==4.0.3
12
+ attrs==23.1.0
13
+ audioread==3.0.1
14
+ Babel==2.13.1
15
+ bangla==0.0.2
16
+ blinker==1.7.0
17
+ bnnumerizer==0.0.2
18
+ bnunicodenormalizer==0.1.1
19
+ cachetools==5.3.2
20
+ certifi==2022.12.7
21
+ cffi==1.16.0
22
+ charset-normalizer==2.1.1
23
+ clean-fid==0.1.35
24
+ click==8.1.7
25
+ clip-anytorch==2.5.2
26
+ cmake==3.25.0
27
+ colorama==0.4.6
28
+ contourpy==1.2.0
29
+ coqpit==0.0.17
30
+ cutlet==0.3.0
31
+ cycler==0.12.1
32
+ Cython==0.29.30
33
+ dateparser==1.1.8
34
+ decorator==5.1.1
35
+ deepspeed==0.11.1
36
+ docker-pycreds==0.4.0
37
+ docopt==0.6.2
38
+ einops==0.6.1
39
+ encodec==0.1.1
40
+ exceptiongroup==1.1.3
41
+ fastapi==0.104.1
42
+ ffmpy==0.3.1
43
+ filelock==3.9.0
44
+ Flask==2.3.3
45
+ fonttools==4.44.0
46
+ frozenlist==1.4.0
47
+ fsspec==2023.6.0
48
+ ftfy==6.1.1
49
+ fugashi==1.3.0
50
+ g2pkk==0.1.2
51
+ gitdb==4.0.11
52
+ GitPython==3.1.40
53
+ google-auth==2.23.4
54
+ google-auth-oauthlib==1.1.0
55
+ gradio==3.48.0
56
+ gradio_client==0.6.1
57
+ grpcio==1.59.2
58
+ gruut==2.2.3
59
+ gruut-ipa==0.13.0
60
+ gruut-lang-de==2.0.0
61
+ gruut-lang-en==2.0.0
62
+ gruut-lang-es==2.0.0
63
+ gruut-lang-fr==2.0.2
64
+ h11==0.14.0
65
+ hangul-romanize==0.1.0
66
+ hjson==3.1.0
67
+ httpcore==1.0.2
68
+ httpx==0.25.1
69
+ huggingface-hub==0.19.0
70
+ idna==3.4
71
+ imageio==2.32.0
72
+ importlib-resources==6.1.1
73
+ inflect==5.6.0
74
+ itsdangerous==2.1.2
75
+ jaconv==0.3.4
76
+ jamo==0.4.1
77
+ jieba==0.42.1
78
+ Jinja2==3.1.2
79
+ joblib==1.3.2
80
+ jsonlines==1.2.0
81
+ jsonmerge==1.9.2
82
+ jsonschema==4.19.2
83
+ jsonschema-specifications==2023.7.1
84
+ k-diffusion==0.0.16
85
+ kiwisolver==1.4.5
86
+ kornia==0.7.0
87
+ langid==1.1.6
88
+ lazy_loader==0.3
89
+ librosa==0.10.0
90
+ lit==15.0.7
91
+ llvmlite==0.40.1
92
+ Markdown==3.5.1
93
+ markdown-it-py==3.0.0
94
+ MarkupSafe==2.1.3
95
+ matplotlib==3.7.3
96
+ mdurl==0.1.2
97
+ mecab-python3==1.0.6
98
+ mojimoji==0.0.12
99
+ mpmath==1.3.0
100
+ msgpack==1.0.7
101
+ multidict==6.0.4
102
+ networkx==2.8.8
103
+ ninja==1.11.1.1
104
+ nltk==3.8.1
105
+ num2words==0.5.13
106
+ numba==0.57.0
107
+ numpy==1.22.0
108
+ oauthlib==3.2.2
109
+ orjson==3.9.10
110
+ packaging==23.1
111
+ pandas==1.5.3
112
+ Pillow==9.3.0
113
+ platformdirs==4.0.0
114
+ pooch==1.8.0
115
+ protobuf==4.23.4
116
+ psutil==5.9.6
117
+ py-cpuinfo==9.0.0
118
+ pyasn1==0.5.0
119
+ pyasn1-modules==0.3.0
120
+ pycparser==2.21
121
+ pydantic==1.10.13
122
+ pydantic_core==2.10.1
123
+ pydub==0.25.1
124
+ Pygments==2.16.1
125
+ pynndescent==0.5.10
126
+ pyparsing==3.1.1
127
+ pypinyin==0.47.1
128
+ pysbd==0.3.4
129
+ python-crfsuite==0.9.9
130
+ python-dateutil==2.8.2
131
+ python-multipart==0.0.6
132
+ pytz==2023.3.post1
133
+ PyYAML==6.0.1
134
+ referencing==0.30.2
135
+ regex==2023.10.3
136
+ requests==2.28.1
137
+ requests-oauthlib==1.3.1
138
+ resize-right==0.0.2
139
+ rich==13.6.0
140
+ rpds-py==0.12.0
141
+ rsa==4.9
142
+ safetensors==0.4.0
143
+ scikit-image==0.22.0
144
+ scikit-learn==1.3.0
145
+ scipy==1.11.3
146
+ semantic-version==2.10.0
147
+ sentry-sdk==1.35.0
148
+ setproctitle==1.3.3
149
+ shellingham==1.5.4
150
+ six==1.16.0
151
+ smmap==5.0.1
152
+ sniffio==1.3.0
153
+ soundfile==0.12.1
154
+ soxr==0.3.7
155
+ starlette==0.27.0
156
+ sympy==1.12
157
+ tensorboard==2.15.1
158
+ tensorboard-data-server==0.7.2
159
+ threadpoolctl==3.2.0
160
+ tifffile==2023.9.26
161
+ tokenizers==0.13.3
162
+ tomlkit==0.12.0
163
+ toolz==0.12.0
164
+ torch==1.13.1+cu117
165
+ torchaudio==0.13.1+cu117
166
+ torchdiffeq==0.2.3
167
+ torchsde==0.2.6
168
+ torchvision==0.14.1+cu117
169
+ tqdm==4.64.1
170
+ trainer==0.0.31
171
+ trampoline==0.1.2
172
+ transformers==4.33.3
173
+ triton==2.0.0
174
+ TTS @ git+https://github.com/coqui-ai/tts.git@46d9c27212939aa54b22f9df842c753de67b1f34
175
+ typer==0.9.0
176
+ typing_extensions==4.8.0
177
+ tzlocal==5.2
178
+ umap-learn==0.5.1
179
+ Unidecode==1.3.7
180
+ unidic-lite==1.0.8
181
+ urllib3==1.26.13
182
+ uvicorn==0.24.0.post1
183
+ wandb==0.16.0
184
+ wcwidth==0.2.9
185
+ websockets==11.0.3
186
+ Werkzeug==3.0.1
187
+ yarl==1.9.2