mrm8488 commited on
Commit
faec64e
1 Parent(s): 59c4444

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -71
app.py CHANGED
@@ -1,30 +1,31 @@
1
  import gradio as gr
2
  from TTS.api import TTS
3
 
4
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
5
- tts.to("cuda")
6
 
 
 
7
 
8
- def predict(prompt, language, audio_file_pth, agree):
9
- if agree == True:
10
- tts.tts_to_file(
11
- text=prompt,
12
- file_path="output.wav",
13
- speaker_wav=audio_file_pth,
14
- language=language,
15
- )
16
 
17
- return (
18
- gr.make_waveform(
19
- audio="output.wav",
20
- ),
21
- "output.wav",
22
- )
23
- else:
24
- gr.Warning("Please accept the Terms & Condition!")
 
 
 
 
 
 
 
25
 
26
 
27
- title = "Coqui🐸 XTTS"
28
 
29
  description = """
30
  <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
@@ -47,67 +48,20 @@ article = """
47
  </div>
48
  """
49
 
50
- examples = [
51
- [
52
- "Once when I was six years old I saw a magnificent picture.",
53
- "en",
54
- "examples/female.wav",
55
- True,
56
- ],
57
- [
58
- "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image.",
59
- "fr",
60
- "examples/male.wav",
61
- True,
62
- ],
63
- [
64
- "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno.",
65
- "it",
66
- "examples/female.wav",
67
- True,
68
- ],
69
- ]
70
-
71
  gr.Interface(
72
  fn=predict,
73
  inputs=[
74
  gr.Textbox(
75
- label="Text Prompt",
76
- info="One or two sentences at a time is better",
77
- value="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
78
- ),
79
- gr.Dropdown(
80
- label="Language",
81
- info="Select an output language for the synthesised speech",
82
- choices=[
83
- "en",
84
- "es",
85
- "fr",
86
- "de",
87
- "it",
88
- "pt",
89
- "pl",
90
- "tr",
91
- "ru",
92
- "nl",
93
- "cz",
94
- "ar",
95
- "zh-cn",
96
- ],
97
- max_choices=1,
98
- value="en",
99
  ),
100
  gr.Audio(
101
- label="Reference Audio",
102
- info="Click on thebutton to upload your own target speaker audio",
103
  type="filepath",
104
  value="examples/female.wav",
105
  ),
106
- gr.Checkbox(
107
- label="Agree",
108
- value=False,
109
- info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
110
- ),
111
  ],
112
  outputs=[
113
  gr.Video(label="Waveform Visual"),
 
1
  import gradio as gr
2
  from TTS.api import TTS
3
 
4
+ model_id = "tts_models/multilingual/multi-dataset/xtts_v1"
5
+ device = "cuda"
6
 
7
+ tts = TTS
8
+ tts.to(device)
9
 
 
 
 
 
 
 
 
 
10
 
11
+ def predict(prompt, audio_file_pth):
12
+
13
+ tts.tts_to_file(
14
+ text=prompt,
15
+ file_path="output.wav",
16
+ speaker_wav=audio_file_pth,
17
+ language=language,
18
+ )
19
+
20
+ return (
21
+ gr.make_waveform(
22
+ audio="output.wav",
23
+ ),
24
+ "output.wav",
25
+ )
26
 
27
 
28
+ title = "Coquib🐸 XTTS - Spanish Demo"
29
 
30
  description = """
31
  <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
 
48
  </div>
49
  """
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  gr.Interface(
52
  fn=predict,
53
  inputs=[
54
  gr.Textbox(
55
+ label="Texto",
56
+ info="Una o dos frases es suficiente-",
57
+ value="Clibrain es una empresa que desarrolla soluciones basadas en inteligencia artificial en español.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ),
59
  gr.Audio(
60
+ label="Audio de referencia",
61
+ info="Haz clic en el botón para subir tu propio audio o del hablante objetivo",
62
  type="filepath",
63
  value="examples/female.wav",
64
  ),
 
 
 
 
 
65
  ],
66
  outputs=[
67
  gr.Video(label="Waveform Visual"),