Pablo Sampaio commited on
Commit
b2a1255
1 Parent(s): fd22ae3

Correções

Browse files
Files changed (1) hide show
  1. app.py +25 -24
app.py CHANGED
@@ -4,16 +4,19 @@ import gradio as gr
4
  from openai import OpenAI
5
 
6
 
7
- LOAD_SHARED_KEY = True
8
 
9
- # Load OpenAI API key, if the file exists
10
- if LOAD_SHARED_KEY and os.path.exists('KEY_OPENAI'):
11
  with open('KEY_OPENAI', 'r') as file:
12
  os.environ['OPENAI_API_KEY'] = file.read().replace('\n', '')
13
- else:
14
- LOAD_SHARED_KEY = False
15
 
16
- AUDIO_OUTPUT_FILE = "output" # prefixo do nome do arquivo de áudio .wav
 
 
 
 
 
17
 
18
  TEMPLATE_SYSTEM_MESSAGE = """Sua função é entreter uma criança com idade entre 6 e 8 anos que adora futebol. Diretrizes para a conversa:
19
  - Seu nome é {NAME}
@@ -59,7 +62,7 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
59
  input=assistant_msg
60
  )
61
 
62
- output_audio_file = f"{AUDIO_OUTPUT_FILE}-{len(chat_history)+1:03}.wav"
63
  #response.stream_to_file(output_audio_file)
64
  response.write_to_file(output_audio_file)
65
 
@@ -72,7 +75,7 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
72
  def reset_and_apply(openai_key, voice):
73
  global OPENAI_CLIENT
74
  OPENAI_CLIENT = OpenAI(api_key=openai_key)
75
- return [("", "Olá, vamos falar de futebol?")], AUDIO_OUTPUT_FILE + f"-001-{voice}.wav"
76
 
77
  def reset_openai_client(openai_key):
78
  global OPENAI_CLIENT
@@ -83,31 +86,30 @@ def on_voice_change(voice):
83
  return TEMPLATE_SYSTEM_MESSAGE.format(NAME=voice.upper(), PERSONALITY=persona_description), persona_temperature
84
 
85
 
86
- USE_ASR_PIPELINE = False
87
 
88
- # With Pipeline (downloaded model)
89
- if USE_ASR_PIPELINE:
90
  from transformers import pipeline
91
  import numpy as np
92
 
93
  global ASR_PIPELINE
94
- ASR_PIPELINE = pipeline(task="automatic-speech-recognition",
95
- model="distil-whisper/distil-small.en")
96
-
97
  else:
98
  import requests
99
- with open('KEY_HF', 'r') as file:
100
- HF_KEY = file.read().replace('\n', '')
101
-
102
  global ASR_API_URL, ASR_API_HEADERS
103
 
 
 
104
  # Serverless API endpoint for OpenAI's Whisper model
105
  ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
106
  ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
107
 
108
 
109
  def transcribe(audio_in):
110
- if USE_ASR_PIPELINE:
111
  # sampling rate and audio data
112
  sr, y = audio_in
113
  y2 = y.astype(np.float32)
@@ -128,7 +130,6 @@ def transcribe_and_respond(audio_in, system_txtbox, user_msg_txb, *args):
128
  user_message = transcribe(audio_in)
129
  outputs = respond(system_txtbox, user_message, *args)
130
  return outputs
131
- #return *outputs, audio_in
132
 
133
 
134
  OPENAI_CLIENT = None
@@ -143,10 +144,10 @@ with gr.Blocks() as demo:
143
  audio_out = gr.Audio(label="Escute a última mensagem", value=initial_audio, autoplay=True, interactive=False)
144
 
145
  user_msg_txb = gr.Textbox(label="Mensagem")
146
- #if USE_ASR_PIPELINE:
147
- # audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='numpy')
148
- #else:
149
- # audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath')
150
 
151
  submit_btn = gr.Button("Enviar")
152
 
@@ -154,7 +155,7 @@ with gr.Blocks() as demo:
154
  reset_btn = gr.Button("Reiniciar")
155
 
156
  with gr.Accordion(label="Configurações",open=False):
157
- if LOAD_SHARED_KEY:
158
  openai_key = gr.Textbox(label="OPENAI API Key", value=os.environ['OPENAI_API_KEY'])
159
  else:
160
  openai_key = gr.Textbox(label="OPENAI API Key", placeholder="Insert your API key here")
 
4
  from openai import OpenAI
5
 
6
 
7
+ LOAD_KEYS_FROM_FILES = True
8
 
9
+ if LOAD_KEYS_FROM_FILES:
10
+ # Load OpenAI API key
11
  with open('KEY_OPENAI', 'r') as file:
12
  os.environ['OPENAI_API_KEY'] = file.read().replace('\n', '')
 
 
13
 
14
+ # Hugging Face API key, used for the serverless access to ASR model
15
+ with open('KEY_HF', 'r') as file:
16
+ os.environ['HUGGINGFACE_API_KEY'] = file.read().replace('\n', '')
17
+
18
+
19
+ AUDIO_OUT_FILE_PREFIX = "output" # prefixo do nome do arquivo de áudio .wav
20
 
21
  TEMPLATE_SYSTEM_MESSAGE = """Sua função é entreter uma criança com idade entre 6 e 8 anos que adora futebol. Diretrizes para a conversa:
22
  - Seu nome é {NAME}
 
62
  input=assistant_msg
63
  )
64
 
65
+ output_audio_file = f"{AUDIO_OUT_FILE_PREFIX}-{len(chat_history)+1:03}.wav"
66
  #response.stream_to_file(output_audio_file)
67
  response.write_to_file(output_audio_file)
68
 
 
75
  def reset_and_apply(openai_key, voice):
76
  global OPENAI_CLIENT
77
  OPENAI_CLIENT = OpenAI(api_key=openai_key)
78
+ return [("", "Olá, vamos falar de futebol?")], AUDIO_OUT_FILE_PREFIX + f"-001-{voice}.wav"
79
 
80
  def reset_openai_client(openai_key):
81
  global OPENAI_CLIENT
 
86
  return TEMPLATE_SYSTEM_MESSAGE.format(NAME=voice.upper(), PERSONALITY=persona_description), persona_temperature
87
 
88
 
89
+ USE_LOCAL_ASR_PIPELINE = True
90
 
91
+ # With pipeline (downloaded model)
92
+ if USE_LOCAL_ASR_PIPELINE:
93
  from transformers import pipeline
94
  import numpy as np
95
 
96
  global ASR_PIPELINE
97
+ ASR_PIPELINE = pipeline(task="automatic-speech-recognition",
98
+ model="openai/whisper-large-v3")
99
+ #model="distil-whisper/distil-small.en") # English only
100
  else:
101
  import requests
 
 
 
102
  global ASR_API_URL, ASR_API_HEADERS
103
 
104
+ HF_KEY = os.environ['HUGGINGFACE_API_KEY']
105
+
106
  # Serverless API endpoint for OpenAI's Whisper model
107
  ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
108
  ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
109
 
110
 
111
  def transcribe(audio_in):
112
+ if USE_LOCAL_ASR_PIPELINE:
113
  # sampling rate and audio data
114
  sr, y = audio_in
115
  y2 = y.astype(np.float32)
 
130
  user_message = transcribe(audio_in)
131
  outputs = respond(system_txtbox, user_message, *args)
132
  return outputs
 
133
 
134
 
135
  OPENAI_CLIENT = None
 
144
  audio_out = gr.Audio(label="Escute a última mensagem", value=initial_audio, autoplay=True, interactive=False)
145
 
146
  user_msg_txb = gr.Textbox(label="Mensagem")
147
+ if USE_LOCAL_ASR_PIPELINE:
148
+ audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='numpy')
149
+ else:
150
+ audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath')
151
 
152
  submit_btn = gr.Button("Enviar")
153
 
 
155
  reset_btn = gr.Button("Reiniciar")
156
 
157
  with gr.Accordion(label="Configurações",open=False):
158
+ if LOAD_KEYS_FROM_FILES:
159
  openai_key = gr.Textbox(label="OPENAI API Key", value=os.environ['OPENAI_API_KEY'])
160
  else:
161
  openai_key = gr.Textbox(label="OPENAI API Key", placeholder="Insert your API key here")