pablo-sampaio commited on
Commit
62fbd81
1 Parent(s): 82b1a2e

The audio is played without saving an intermediate file

Browse files
Files changed (1) hide show
  1. app.py +38 -30
app.py CHANGED
@@ -1,35 +1,31 @@
1
  import os
 
 
2
 
 
3
  import gradio as gr
 
4
  from openai import OpenAI
5
  import google.generativeai as genai
6
 
 
 
 
7
  from match_info_crawler import get_matches_info
8
 
9
 
10
- LOAD_KEYS_FROM_FILES = False
11
  USE_LOCAL_ASR_PIPELINE = False
12
 
13
 
14
- if LOAD_KEYS_FROM_FILES:
15
- # Load OpenAI API key
16
- with open('KEY_OPENAI', 'r') as file:
17
- OPENAI_API_KEY = file.read().replace('\n', '')
18
 
19
- # Hugging Face API key, used for the serverless access to ASR model
20
- with open('KEY_HF', 'r') as file:
21
- os.environ['HUGGINGFACE_API_KEY'] = file.read().replace('\n', '')
22
 
23
- # Google AI API key, to acces Gemini model
24
- with open('KEY_GOOGLE_AI', 'r') as file:
25
- GOOGLE_API_KEY = file.read().replace('\n', '')
26
- else:
27
- # testar se existe a variável OPENAI_API_KEY
28
- OPENAI_API_KEY = "" if 'OPENAI_API_KEY' not in os.environ else os.environ['OPENAI_API_KEY']
29
- GOOGLE_API_KEY = "" if 'GOOGLE_API_KEY' not in os.environ else os.environ['GOOGLE_API_KEY']
30
- assert 'HUGGINGFACE_API_KEY' in os.environ, "Hugging Face API key not found in environment variables"
31
 
32
- GOOGLE_API_KEY = ""
33
  USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")
34
 
35
  OPENAI_CLIENT = None
@@ -98,7 +94,7 @@ TOOLS_SPECIFICATION_OPENAI = [
98
  "type": "function",
99
  "function": {
100
  "name": "get_matches_info",
101
- "description": "Use this function to retrieve information about matches from the most important leagues, with the time given in Brazilian timezone.",
102
  #+ "Returns a string with one matche per line; or empty string if the service is not available now.",
103
  "parameters": {
104
  "type": "object",
@@ -114,7 +110,26 @@ TOOLS_SPECIFICATION_OPENAI = [
114
  }
115
  ]
116
 
117
- def respond(system_prompt, user_message, chat_history, temperature, voice="echo"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  if USE_OPENAI_FOR_CHAT:
119
  openai_history = to_openai_chat_history(system_prompt, chat_history, user_message)
120
 
@@ -146,10 +161,6 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
146
  system_instruction=system_prompt,
147
  tools=[get_matches_info],
148
  generation_config=GOOGLE_GEN_CONFIG)
149
-
150
- # funcionava assim antes de usar tools
151
- #google_history = to_google_history(chat_history, user_message)
152
- #bot_response = model.generate_content(google_history, generation_config=GOOGLE_GEN_CONFIG, tools=[get_matches_info])
153
 
154
  google_history = to_google_history(chat_history)
155
  chat = model.start_chat(history=google_history,
@@ -161,18 +172,15 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
161
  # salva o audio
162
  response = OPENAI_CLIENT.audio.speech.create(
163
  model="tts-1",
164
- voice=voice,
165
- input=assistant_msg
 
166
  )
167
 
168
- output_audio_file = f"{AUDIO_OUT_FILE_PREFIX}-{len(chat_history)+1:03}.wav"
169
- #response.stream_to_file(output_audio_file)
170
- response.write_to_file(output_audio_file)
171
-
172
  # adiciona ao chat, com o tipo de dado esperado pelo Gradio
173
  chat_history.append( (user_message, assistant_msg) )
174
 
175
- return "", chat_history, output_audio_file
176
 
177
 
178
  def reset_and_apply(voice):
 
1
  import os
2
+ import io
3
+ import wave
4
 
5
+ import numpy as np
6
  import gradio as gr
7
+
8
  from openai import OpenAI
9
  import google.generativeai as genai
10
 
11
+ from dotenv import load_dotenv, find_dotenv
12
+ load_dotenv(find_dotenv())
13
+
14
  from match_info_crawler import get_matches_info
15
 
16
 
 
17
  USE_LOCAL_ASR_PIPELINE = False
18
 
19
 
20
+ # used for chat, if provided
21
+ GOOGLE_API_KEY = "" #if 'GOOGLE_API_KEY' not in os.environ else os.environ['GOOGLE_API_KEY']
 
 
22
 
23
+ # used for chat (2nd option) and for text-to-speech
24
+ OPENAI_API_KEY = "" if 'OPENAI_API_KEY' not in os.environ else os.environ['OPENAI_API_KEY']
 
25
 
26
+ # used for speech recognition, if USE_LOCAL_ASR_PIPELINE is true
27
+ assert 'HUGGINGFACE_API_KEY' in os.environ, "Hugging Face API key not found in environment variables"
 
 
 
 
 
 
28
 
 
29
  USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")
30
 
31
  OPENAI_CLIENT = None
 
94
  "type": "function",
95
  "function": {
96
  "name": "get_matches_info",
97
+ "description": "Use this function to retrieve information about football (soccer) matches from the most important leagues. Time of the matches is given in Brazilian timezone.",
98
  #+ "Returns a string with one matche per line; or empty string if the service is not available now.",
99
  "parameters": {
100
  "type": "object",
 
110
  }
111
  ]
112
 
113
+
114
+ def process_wave(audio_bytes):
115
+ audio_file = io.BytesIO(audio_bytes)
116
+
117
+ # Read the wave file using the wave module
118
+ wave_file = wave.open(audio_file)
119
+
120
+ # Get audio parameters
121
+ #num_channels = wave_file.getnchannels()
122
+ frame_rate = wave_file.getframerate()
123
+ #sample_width = wave_file.getsampwidth()
124
+ num_frames = wave_file.getnframes()
125
+
126
+ # Read the audio data as a NumPy array
127
+ audio_array = np.frombuffer(wave_file.readframes(num_frames), dtype=np.int16)
128
+
129
+ return (frame_rate, audio_array)
130
+
131
+
132
+ def respond(system_prompt, user_message, chat_history, temperature, persona="echo"):
133
  if USE_OPENAI_FOR_CHAT:
134
  openai_history = to_openai_chat_history(system_prompt, chat_history, user_message)
135
 
 
161
  system_instruction=system_prompt,
162
  tools=[get_matches_info],
163
  generation_config=GOOGLE_GEN_CONFIG)
 
 
 
 
164
 
165
  google_history = to_google_history(chat_history)
166
  chat = model.start_chat(history=google_history,
 
172
  # salva o audio
173
  response = OPENAI_CLIENT.audio.speech.create(
174
  model="tts-1",
175
+ voice=persona,
176
+ input=assistant_msg,
177
+ response_format='wav' # se for salvar em arquivo, (acho) pode usar 'mp3'
178
  )
179
 
 
 
 
 
180
  # adiciona ao chat, com o tipo de dado esperado pelo Gradio
181
  chat_history.append( (user_message, assistant_msg) )
182
 
183
+ return "", chat_history, process_wave(response.content)
184
 
185
 
186
  def reset_and_apply(voice):