xJuuzouYTx commited on
Commit
6f1ebe2
1 Parent(s): 1b5ebf2

[ADD] elevenlabs tts

Browse files
Files changed (4) hide show
  1. app.py +20 -12
  2. requirements.txt +1 -1
  3. tts/constants.py +1 -1
  4. tts/conversion.py +26 -7
app.py CHANGED
@@ -7,7 +7,7 @@ from utils.model import model_downloader, get_model
7
  import requests
8
  import json
9
  from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
10
- from tts.conversion import tts_infer
11
 
12
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
13
 
@@ -130,9 +130,11 @@ def search_model(name):
130
 
131
  def update_tts_methods_voice(select_value):
132
  if select_value == "Edge-tts":
133
- return gr.update(choices=EDGE_VOICES)
134
  elif select_value == "Bark-tts":
135
- return gr.update(choices=BARK_VOICES)
 
 
136
 
137
  with gr.Blocks() as app:
138
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
@@ -163,19 +165,26 @@ with gr.Blocks() as app:
163
  with gr.Row():
164
  tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
165
 
166
- with gr.Column():
167
- tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=False)
168
- tts_model = gr.Dropdown(choices=EDGE_VOICES, label="Modelo TTS:", visible=True, interactive=True)
169
- tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model])
170
-
 
 
171
  with gr.Row():
172
  tts_vc_output1 = gr.Textbox(label="Salida")
173
  tts_vc_output2 = gr.Audio(label="Audio de salida")
174
 
175
- tts_btn = gr.Button(value="Convertir")
176
- tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model], outputs=[tts_vc_output1, tts_vc_output2])
177
 
178
- with gr.Tab("Recursos"):
 
 
 
 
 
 
179
  gr.HTML("<h4>Buscar modelos</h4>")
180
  search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
181
  # Salida
@@ -198,7 +207,6 @@ with gr.Blocks() as app:
198
  btn_post_model = gr.Button(value="Publicar")
199
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
200
 
201
-
202
  # with gr.Column():
203
  # model_voice_path07 = gr.Dropdown(
204
  # label=i18n("RVC Model:"),
 
7
  import requests
8
  import json
9
  from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
10
+ from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
11
 
12
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
13
 
 
130
 
131
  def update_tts_methods_voice(select_value):
132
  if select_value == "Edge-tts":
133
+ return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
134
  elif select_value == "Bark-tts":
135
+ return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
136
+ elif select_value == 'ElevenLabs':
137
+ return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True)
138
 
139
  with gr.Blocks() as app:
140
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
 
165
  with gr.Row():
166
  tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
167
 
168
+ with gr.Row():
169
+ tts_method = gr.Dropdown(choices=VOICE_METHODS, value="ElevenLabs", label="Método TTS:", visible=True)
170
+ tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
171
+ tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True)
172
+
173
+ tts_btn = gr.Button(value="Convertir")
174
+
175
  with gr.Row():
176
  tts_vc_output1 = gr.Textbox(label="Salida")
177
  tts_vc_output2 = gr.Audio(label="Audio de salida")
178
 
179
+ tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key], outputs=[tts_vc_output1, tts_vc_output2])
 
180
 
181
+ tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
182
+ ![Imgur](https://imgur.com/HH6YTu0.png)
183
+ """, visible=True)
184
+
185
+ tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key])
186
+
187
+ with gr.Tab("Modelos"):
188
  gr.HTML("<h4>Buscar modelos</h4>")
189
  search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
190
  # Salida
 
207
  btn_post_model = gr.Button(value="Publicar")
208
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
209
 
 
210
  # with gr.Column():
211
  # model_voice_path07 = gr.Dropdown(
212
  # label=i18n("RVC Model:"),
requirements.txt CHANGED
@@ -27,7 +27,7 @@ Cython==0.29.30
27
  decorator==5.1.1
28
  discord.py==2.3.2
29
  edge-tts==6.1.5
30
- elevenlabs==0.2.21
31
  entrypoints==0.4
32
  exceptiongroup==1.1.3
33
  executing==1.2.0
 
27
  decorator==5.1.1
28
  discord.py==2.3.2
29
  edge-tts==6.1.5
30
+ elevenlabs
31
  entrypoints==0.4
32
  exceptiongroup==1.1.3
33
  executing==1.2.0
tts/constants.py CHANGED
@@ -1,4 +1,4 @@
1
- VOICE_METHODS = ["Edge-tts", "Bark-tts"]
2
 
3
  BARK_VOICES = [
4
  "v2/en_speaker_0-Male",
 
1
+ VOICE_METHODS = ["Edge-tts", "ElevenLabs",]
2
 
3
  BARK_VOICES = [
4
  "v2/en_speaker_0-Male",
tts/conversion.py CHANGED
@@ -7,6 +7,18 @@ from gtts import gTTS
7
  import edge_tts
8
  from inference import Inference
9
  import asyncio
 
 
 
 
 
 
 
 
 
 
 
 
10
  #git+https://github.com/suno-ai/bark.git
11
  # from transformers import AutoProcessor, BarkModel
12
  # import nltk
@@ -50,16 +62,11 @@ def cast_to_device(tensor, device):
50
  # return speech, sampling_rate
51
 
52
 
53
- def tts_infer(tts_text, model_url, tts_method, tts_model):
54
- print("*****************")
55
- print(tts_text)
56
- print(model_url)
57
  if not tts_text:
58
  return 'Primero escribe el texto que quieres convertir.', None
59
  if not tts_model:
60
  return 'Selecciona un modelo TTS antes de convertir.', None
61
- if not model_url:
62
- return 'Escribe la url de modelo que quieres usar antes de convertir.', None
63
 
64
  f0_method = "harvest"
65
  output_folder = "audios"
@@ -94,7 +101,19 @@ def tts_infer(tts_text, model_url, tts_method, tts_model):
94
  tts.save(converted_tts_filename)
95
  print("Error: Audio will be replaced.")
96
  success = False
97
-
 
 
 
 
 
 
 
 
 
 
 
 
98
  # elif tts_method == "Bark-tts":
99
  # try:
100
  # script = tts_text.replace("\n", " ").strip()
 
7
  import edge_tts
8
  from inference import Inference
9
  import asyncio
10
+ from elevenlabs import voices, generate, save
11
+
12
+ ELEVENLABS_VOICES_RAW = voices()
13
+
14
+ def get_elevenlabs_voice_names():
15
+ elevenlabs_voice_names = []
16
+ for voice in ELEVENLABS_VOICES_RAW:
17
+ elevenlabs_voice_names.append(voice.name)
18
+ return elevenlabs_voice_names
19
+
20
+ ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
21
+
22
  #git+https://github.com/suno-ai/bark.git
23
  # from transformers import AutoProcessor, BarkModel
24
  # import nltk
 
62
  # return speech, sampling_rate
63
 
64
 
65
+ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
 
 
 
66
  if not tts_text:
67
  return 'Primero escribe el texto que quieres convertir.', None
68
  if not tts_model:
69
  return 'Selecciona un modelo TTS antes de convertir.', None
 
 
70
 
71
  f0_method = "harvest"
72
  output_folder = "audios"
 
101
  tts.save(converted_tts_filename)
102
  print("Error: Audio will be replaced.")
103
  success = False
104
+ if tts_method == 'ElevenLabs':
105
+ audio = generate(
106
+ text=tts_text,
107
+ voice=tts_model,
108
+ model="eleven_multilingual_v2",
109
+ api_key=tts_api_key
110
+ )
111
+ save(audio=audio, filename=converted_tts_filename)
112
+ success = True
113
+
114
+ if not model_url:
115
+ return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
116
+
117
  # elif tts_method == "Bark-tts":
118
  # try:
119
  # script = tts_text.replace("\n", " ").strip()