Richie-O3 commited on
Commit
a640dcc
1 Parent(s): c380b7b

add coqui xtts option

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. backend_functions.py +56 -32
app.py CHANGED
@@ -20,7 +20,10 @@ with gr.Blocks() as main_app:
20
  chat = gr.Chatbot(label="Chatbot Crunchyroll")
21
  output_video = gr.Video(interactive=False, label='Video', autoplay=True, height=400)
22
 
23
- output_audio = gr.Audio(interactive=False, label='Audio', autoplay=False)
 
 
 
24
 
25
  messages = gr.State([])
26
 
@@ -35,7 +38,7 @@ with gr.Blocks() as main_app:
35
  with gr.Tab('Times'):
36
 
37
  columns = ["User Message", "Chatbot Response", "Standalone Question", "Create Embedding", "Query Pinecone",
38
- "Context Prompt", "Final Response GPT", "Create Clean Message", "Create Audio Elevenlabs", "Create Video D-iD", "Final Time"]
39
  table_times = gr.DataFrame(headers=columns, visible=False, interactive=False)
40
 
41
  with gr.Column():
@@ -46,7 +49,7 @@ with gr.Blocks() as main_app:
46
 
47
  text.submit(
48
  fn=get_answer,
49
- inputs=[text, chat, messages, output_audio, output_video, table_times],
50
  outputs=[chat, output_audio, output_video, table_times]
51
  ).then(
52
  lambda: None, None, [text]
@@ -58,7 +61,7 @@ with gr.Blocks() as main_app:
58
 
59
  button_text.click(
60
  fn=get_answer,
61
- inputs=[text, chat, messages, output_audio, output_video, table_times],
62
  outputs=[chat, output_audio, output_video, table_times]
63
  ).then(
64
  lambda: None, None, [text]
 
20
  chat = gr.Chatbot(label="Chatbot Crunchyroll")
21
  output_video = gr.Video(interactive=False, label='Video', autoplay=True, height=400)
22
 
23
+ with gr.Column():
24
+ with gr.Row():
25
+ options_audio = gr.Radio(["XTTS", "Elevenlabs"], label="Audio Generation")
26
+ output_audio = gr.Audio(interactive=False, label='Audio', autoplay=False)
27
 
28
  messages = gr.State([])
29
 
 
38
  with gr.Tab('Times'):
39
 
40
  columns = ["User Message", "Chatbot Response", "Standalone Question", "Create Embedding", "Query Pinecone",
41
+ "Context Prompt", "Final Response GPT", "Create Clean Message", "Create Audio", "Create Video", "Final Time"]
42
  table_times = gr.DataFrame(headers=columns, visible=False, interactive=False)
43
 
44
  with gr.Column():
 
49
 
50
  text.submit(
51
  fn=get_answer,
52
+ inputs=[text, chat, messages, output_audio, output_video, table_times, options_audio],
53
  outputs=[chat, output_audio, output_video, table_times]
54
  ).then(
55
  lambda: None, None, [text]
 
61
 
62
  button_text.click(
63
  fn=get_answer,
64
+ inputs=[text, chat, messages, output_audio, output_video, table_times, options_audio],
65
  outputs=[chat, output_audio, output_video, table_times]
66
  ).then(
67
  lambda: None, None, [text]
backend_functions.py CHANGED
@@ -15,6 +15,8 @@ from pymongo.mongo_client import MongoClient
15
  from utils import create_folders
16
  from gcp import download_credentials
17
  from csv import writer
 
 
18
  from dotenv import load_dotenv
19
  load_dotenv()
20
 
@@ -38,6 +40,7 @@ IMG_XAVY = os.getenv("IMG_XAVY")
38
  CREDENTIALS_GCP = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
39
  NAME_BUCKET = os.getenv("NAME_BUCKET")
40
 
 
41
 
42
  # Chat
43
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
@@ -155,6 +158,8 @@ def _get_standalone_question(question, history_messages):
155
  print(e)
156
 
157
  prompt_standalone = file_prompt_standalone.replace('HISTORY', history).replace('QUESTION', question)
 
 
158
  standalone_msg_q = _call_gpt_standalone(prompt_standalone)
159
  print(standalone_msg_q)
160
  print("------------------")
@@ -167,47 +172,66 @@ def _create_clean_message(text: str):
167
  return clean_answer
168
 
169
 
170
- def _create_audio(clean_text: str):
171
  download_credentials()
172
  create_folders()
173
 
174
  STORAGE_CLIENT = storage.Client.from_service_account_json(CREDENTIALS_GCP)
175
 
176
  unique_id = str(uuid.uuid4())
 
177
 
178
- # Create audio file
179
- client_elevenlabs = ElevenLabs(api_key=API_KEY_ELEVENLABS)
180
- voice_custom = Voice(voice_id = "ZQe5CZNOzWyzPSCn5a3c")
 
181
 
182
- audio = client_elevenlabs.generate(
183
- text=clean_text,
184
- voice=voice_custom,
185
- model="eleven_multilingual_v2"
186
- )
187
 
188
- source_audio_file_name = f'./audios/file_audio_{unique_id}.wav'
189
 
190
- try:
191
- save(audio, source_audio_file_name)
192
- except Exception as e:
193
- print(e)
194
 
195
- # Save audio and get url of gcp
196
- destination_blob_name_audio = unique_id + '.wav'
197
-
198
- bucket = STORAGE_CLIENT.bucket(NAME_BUCKET)
199
- blob = bucket.blob(destination_blob_name_audio)
200
- try:
201
- blob.upload_from_filename(source_audio_file_name)
202
- except Exception as e:
203
- print(e)
204
 
205
- signed_url_audio = "None"
206
- try:
207
- url_expiration = timedelta(minutes=15)
208
- signed_url_audio = blob.generate_signed_url(expiration=url_expiration)
209
- except Exception as e:
210
- print(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  return signed_url_audio, unique_id
213
 
@@ -296,7 +320,7 @@ def _create_video(link_audio: str, unique_id: str):
296
  return signed_url_video
297
 
298
 
299
- def get_answer(question: str, chatbot: list[tuple[str, str]], history_messages, comp_audio, comp_video, df_table):
300
  """
301
  Gets the answer of the chatbot
302
  """
@@ -338,7 +362,7 @@ def get_answer(question: str, chatbot: list[tuple[str, str]], history_messages,
338
  time_create_clean_message = end_create_clean_message - start_create_clean_message
339
 
340
  start_create_audio = time.time()
341
- url_audio, unique_id = _create_audio(processed_message) # create audio with elevenlabs
342
  end_create_audio = time.time()
343
  time_create_audio = end_create_audio - start_create_audio
344
 
@@ -372,7 +396,7 @@ def get_answer(question: str, chatbot: list[tuple[str, str]], history_messages,
372
 
373
  def init_greeting(chatbot, history_messages):
374
  if len(chatbot) == 0:
375
- greeting = ('Hola 👋, soy Roll, tu asistente de recomendación de series y películas animadas en Crunchyroll. ¿En qué puedo ayudarte hoy?')
376
  history_messages.append({'role': 'assistant', 'content': greeting})
377
  chatbot.append([None, greeting])
378
 
 
15
  from utils import create_folders
16
  from gcp import download_credentials
17
  from csv import writer
18
+ import asyncio
19
+ import httpx
20
  from dotenv import load_dotenv
21
  load_dotenv()
22
 
 
40
  CREDENTIALS_GCP = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
41
  NAME_BUCKET = os.getenv("NAME_BUCKET")
42
 
43
+ URL_AUDIO = os.getenv("URL_AUDIO")
44
 
45
  # Chat
46
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
 
158
  print(e)
159
 
160
  prompt_standalone = file_prompt_standalone.replace('HISTORY', history).replace('QUESTION', question)
161
+ print(prompt_standalone)
162
+ print("------------------")
163
  standalone_msg_q = _call_gpt_standalone(prompt_standalone)
164
  print(standalone_msg_q)
165
  print("------------------")
 
172
  return clean_answer
173
 
174
 
175
+ async def _create_audio(clean_text: str, option_audio: str):
176
  download_credentials()
177
  create_folders()
178
 
179
  STORAGE_CLIENT = storage.Client.from_service_account_json(CREDENTIALS_GCP)
180
 
181
  unique_id = str(uuid.uuid4())
182
+ signed_url_audio = "None"
183
 
184
+ if option_audio == "Elevenlabs":
185
+ # Create audio file with elevenlabs
186
+ client_elevenlabs = ElevenLabs(api_key=API_KEY_ELEVENLABS)
187
+ voice_custom = Voice(voice_id = "ZQe5CZNOzWyzPSCn5a3c")
188
 
189
+ audio = client_elevenlabs.generate(
190
+ text=clean_text,
191
+ voice=voice_custom,
192
+ model="eleven_multilingual_v2"
193
+ )
194
 
195
+ source_audio_file_name = f'./audios/file_audio_{unique_id}.wav'
196
 
197
+ try:
198
+ save(audio, source_audio_file_name)
199
+ except Exception as e:
200
+ print(e)
201
 
202
+ # Save audio and get url of gcp
203
+ destination_blob_name_audio = unique_id + '.wav'
204
+
205
+ bucket = STORAGE_CLIENT.bucket(NAME_BUCKET)
206
+ blob = bucket.blob(destination_blob_name_audio)
207
+ try:
208
+ blob.upload_from_filename(source_audio_file_name)
209
+ except Exception as e:
210
+ print(e)
211
 
212
+ try:
213
+ url_expiration = timedelta(minutes=15)
214
+ signed_url_audio = blob.generate_signed_url(expiration=url_expiration)
215
+ except Exception as e:
216
+ print(e)
217
+
218
+ elif option_audio == "XTTS":
219
+ params = {'text': clean_text, 'language': 'es'}
220
+ headers = {'accept': 'application/json'}
221
+
222
+ # Makes a request to the instance with the audio api
223
+ async with httpx.AsyncClient() as client:
224
+ try:
225
+ response = await client.get(URL_AUDIO, params=params, headers=headers, timeout=120)
226
+ except Exception as e:
227
+ print(f'There is a problem with the audio. Check that instance. ERROR: {e}')
228
+
229
+ # Check if everything was successful
230
+ if response.status_code == 200:
231
+ r = response.json()
232
+ signed_url_audio = r['link_audio']
233
+ else:
234
+ print(f'There is a problem with the audio. Check that instance. ERROR: {response.status_code}')
235
 
236
  return signed_url_audio, unique_id
237
 
 
320
  return signed_url_video
321
 
322
 
323
+ def get_answer(question: str, chatbot: list[tuple[str, str]], history_messages, comp_audio, comp_video, df_table, option_audio):
324
  """
325
  Gets the answer of the chatbot
326
  """
 
362
  time_create_clean_message = end_create_clean_message - start_create_clean_message
363
 
364
  start_create_audio = time.time()
365
+ url_audio, unique_id = asyncio.run(_create_audio(processed_message, option_audio)) # create audio
366
  end_create_audio = time.time()
367
  time_create_audio = end_create_audio - start_create_audio
368
 
 
396
 
397
  def init_greeting(chatbot, history_messages):
398
  if len(chatbot) == 0:
399
+ greeting = ('Hola 👋, soy tu asistente de recomendación de series y películas animadas en Crunchyroll. ¿En qué puedo ayudarte hoy?')
400
  history_messages.append({'role': 'assistant', 'content': greeting})
401
  chatbot.append([None, greeting])
402