Spaces:

pablo-sampaio
/

futeboy

Running

App Files Files Community

pablo-sampaio commited on Jun 8

Commit

62fbd81

•

1 Parent(s): 82b1a2e

The audio is played without saving an intermediate file

Browse files

Files changed (1) hide show

app.py +38 -30

app.py CHANGED Viewed

@@ -1,35 +1,31 @@
 import os
 import gradio as gr
 from openai import OpenAI
 import google.generativeai as genai
 from match_info_crawler import get_matches_info
-LOAD_KEYS_FROM_FILES = False
 USE_LOCAL_ASR_PIPELINE = False
-if LOAD_KEYS_FROM_FILES:
-    # Load OpenAI API key
-    with open('KEY_OPENAI', 'r') as file:
-        OPENAI_API_KEY = file.read().replace('\n', '')
-    # Hugging Face API key, used for the serverless access to ASR model
-    with open('KEY_HF', 'r') as file:
-        os.environ['HUGGINGFACE_API_KEY'] = file.read().replace('\n', '')
-    # Google AI API key, to acces Gemini model
-    with open('KEY_GOOGLE_AI', 'r') as file:
-        GOOGLE_API_KEY = file.read().replace('\n', '')
-else:
-    # testar se existe a variável OPENAI_API_KEY
-    OPENAI_API_KEY = "" if 'OPENAI_API_KEY' not in os.environ else os.environ['OPENAI_API_KEY']
-    GOOGLE_API_KEY = "" if 'GOOGLE_API_KEY' not in os.environ else os.environ['GOOGLE_API_KEY']
-    assert 'HUGGINGFACE_API_KEY' in os.environ, "Hugging Face API key not found in environment variables"
-GOOGLE_API_KEY = ""
 USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")
 OPENAI_CLIENT = None
@@ -98,7 +94,7 @@ TOOLS_SPECIFICATION_OPENAI = [
         "type": "function",
         "function": {
             "name": "get_matches_info",
-            "description": "Use this function to retrieve information about matches from the most important leagues, with the time given in Brazilian timezone.",
                            #+ "Returns a string with one matche per line; or empty string if the service is not available now.",
             "parameters": {
                 "type": "object",
@@ -114,7 +110,26 @@ TOOLS_SPECIFICATION_OPENAI = [
     }
 ]
-def respond(system_prompt, user_message, chat_history, temperature, voice="echo"):
         if USE_OPENAI_FOR_CHAT:
             openai_history = to_openai_chat_history(system_prompt, chat_history, user_message)
@@ -146,10 +161,6 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
                                system_instruction=system_prompt,
                                tools=[get_matches_info],
                                generation_config=GOOGLE_GEN_CONFIG)
-            # funcionava assim antes de usar tools
-            #google_history = to_google_history(chat_history, user_message)
-            #bot_response = model.generate_content(google_history, generation_config=GOOGLE_GEN_CONFIG, tools=[get_matches_info])
             google_history = to_google_history(chat_history)
             chat = model.start_chat(history=google_history,
@@ -161,18 +172,15 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
         # salva o audio
         response = OPENAI_CLIENT.audio.speech.create(
             model="tts-1",
-            voice=voice,
-            input=assistant_msg
         )
-        output_audio_file = f"{AUDIO_OUT_FILE_PREFIX}-{len(chat_history)+1:03}.wav"
-        #response.stream_to_file(output_audio_file)
-        response.write_to_file(output_audio_file)
         # adiciona ao chat, com o tipo de dado esperado pelo Gradio
         chat_history.append( (user_message, assistant_msg) )
-        return "", chat_history, output_audio_file
 def reset_and_apply(voice):

 import os
+import io
+import wave
+import numpy as np
 import gradio as gr
 from openai import OpenAI
 import google.generativeai as genai
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
 from match_info_crawler import get_matches_info
 USE_LOCAL_ASR_PIPELINE = False
+# used for chat, if provided
+GOOGLE_API_KEY = "" #if 'GOOGLE_API_KEY' not in os.environ else os.environ['GOOGLE_API_KEY']
+# used for chat (2nd option) and for text-to-speech
+OPENAI_API_KEY = "" if 'OPENAI_API_KEY' not in os.environ else os.environ['OPENAI_API_KEY']
+# used for speech recognition, if USE_LOCAL_ASR_PIPELINE is true
+assert 'HUGGINGFACE_API_KEY' in os.environ, "Hugging Face API key not found in environment variables"
 USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")
 OPENAI_CLIENT = None
         "type": "function",
         "function": {
             "name": "get_matches_info",
+            "description": "Use this function to retrieve information about football (soccer) matches from the most important leagues. Time of the matches is given in Brazilian timezone.",
                            #+ "Returns a string with one matche per line; or empty string if the service is not available now.",
             "parameters": {
                 "type": "object",
     }
 ]
+def process_wave(audio_bytes):
+    audio_file = io.BytesIO(audio_bytes)
+    # Read the wave file using the wave module
+    wave_file = wave.open(audio_file)
+    # Get audio parameters
+    #num_channels = wave_file.getnchannels()
+    frame_rate = wave_file.getframerate()
+    #sample_width = wave_file.getsampwidth()
+    num_frames = wave_file.getnframes()
+    # Read the audio data as a NumPy array
+    audio_array = np.frombuffer(wave_file.readframes(num_frames), dtype=np.int16)
+    return (frame_rate, audio_array)
+def respond(system_prompt, user_message, chat_history, temperature, persona="echo"):
         if USE_OPENAI_FOR_CHAT:
             openai_history = to_openai_chat_history(system_prompt, chat_history, user_message)
                                system_instruction=system_prompt,
                                tools=[get_matches_info],
                                generation_config=GOOGLE_GEN_CONFIG)
             google_history = to_google_history(chat_history)
             chat = model.start_chat(history=google_history,
         # salva o audio
         response = OPENAI_CLIENT.audio.speech.create(
             model="tts-1",
+            voice=persona,
+            input=assistant_msg,
+            response_format='wav'  # se for salvar em arquivo, (acho) pode usar 'mp3'
         )
         # adiciona ao chat, com o tipo de dado esperado pelo Gradio
         chat_history.append( (user_message, assistant_msg) )
+        return "", chat_history, process_wave(response.content)
 def reset_and_apply(voice):