Spaces:
Sleeping
Sleeping
Pablo Sampaio
commited on
Commit
•
b2a1255
1
Parent(s):
fd22ae3
Correções
Browse files
app.py
CHANGED
@@ -4,16 +4,19 @@ import gradio as gr
|
|
4 |
from openai import OpenAI
|
5 |
|
6 |
|
7 |
-
|
8 |
|
9 |
-
|
10 |
-
|
11 |
with open('KEY_OPENAI', 'r') as file:
|
12 |
os.environ['OPENAI_API_KEY'] = file.read().replace('\n', '')
|
13 |
-
else:
|
14 |
-
LOAD_SHARED_KEY = False
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
TEMPLATE_SYSTEM_MESSAGE = """Sua função é entreter uma criança com idade entre 6 e 8 anos que adora futebol. Diretrizes para a conversa:
|
19 |
- Seu nome é {NAME}
|
@@ -59,7 +62,7 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
|
|
59 |
input=assistant_msg
|
60 |
)
|
61 |
|
62 |
-
output_audio_file = f"{
|
63 |
#response.stream_to_file(output_audio_file)
|
64 |
response.write_to_file(output_audio_file)
|
65 |
|
@@ -72,7 +75,7 @@ def respond(system_prompt, user_message, chat_history, temperature, voice="echo"
|
|
72 |
def reset_and_apply(openai_key, voice):
|
73 |
global OPENAI_CLIENT
|
74 |
OPENAI_CLIENT = OpenAI(api_key=openai_key)
|
75 |
-
return [("", "Olá, vamos falar de futebol?")],
|
76 |
|
77 |
def reset_openai_client(openai_key):
|
78 |
global OPENAI_CLIENT
|
@@ -83,31 +86,30 @@ def on_voice_change(voice):
|
|
83 |
return TEMPLATE_SYSTEM_MESSAGE.format(NAME=voice.upper(), PERSONALITY=persona_description), persona_temperature
|
84 |
|
85 |
|
86 |
-
|
87 |
|
88 |
-
# With
|
89 |
-
if
|
90 |
from transformers import pipeline
|
91 |
import numpy as np
|
92 |
|
93 |
global ASR_PIPELINE
|
94 |
-
ASR_PIPELINE = pipeline(task="automatic-speech-recognition",
|
95 |
-
|
96 |
-
|
97 |
else:
|
98 |
import requests
|
99 |
-
with open('KEY_HF', 'r') as file:
|
100 |
-
HF_KEY = file.read().replace('\n', '')
|
101 |
-
|
102 |
global ASR_API_URL, ASR_API_HEADERS
|
103 |
|
|
|
|
|
104 |
# Serverless API endpoint for OpenAI's Whisper model
|
105 |
ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
|
106 |
ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
|
107 |
|
108 |
|
109 |
def transcribe(audio_in):
|
110 |
-
if
|
111 |
# sampling rate and audio data
|
112 |
sr, y = audio_in
|
113 |
y2 = y.astype(np.float32)
|
@@ -128,7 +130,6 @@ def transcribe_and_respond(audio_in, system_txtbox, user_msg_txb, *args):
|
|
128 |
user_message = transcribe(audio_in)
|
129 |
outputs = respond(system_txtbox, user_message, *args)
|
130 |
return outputs
|
131 |
-
#return *outputs, audio_in
|
132 |
|
133 |
|
134 |
OPENAI_CLIENT = None
|
@@ -143,10 +144,10 @@ with gr.Blocks() as demo:
|
|
143 |
audio_out = gr.Audio(label="Escute a última mensagem", value=initial_audio, autoplay=True, interactive=False)
|
144 |
|
145 |
user_msg_txb = gr.Textbox(label="Mensagem")
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
|
151 |
submit_btn = gr.Button("Enviar")
|
152 |
|
@@ -154,7 +155,7 @@ with gr.Blocks() as demo:
|
|
154 |
reset_btn = gr.Button("Reiniciar")
|
155 |
|
156 |
with gr.Accordion(label="Configurações",open=False):
|
157 |
-
if
|
158 |
openai_key = gr.Textbox(label="OPENAI API Key", value=os.environ['OPENAI_API_KEY'])
|
159 |
else:
|
160 |
openai_key = gr.Textbox(label="OPENAI API Key", placeholder="Insert your API key here")
|
|
|
4 |
from openai import OpenAI
|
5 |
|
6 |
|
7 |
+
LOAD_KEYS_FROM_FILES = True
|
8 |
|
9 |
+
if LOAD_KEYS_FROM_FILES:
|
10 |
+
# Load OpenAI API key
|
11 |
with open('KEY_OPENAI', 'r') as file:
|
12 |
os.environ['OPENAI_API_KEY'] = file.read().replace('\n', '')
|
|
|
|
|
13 |
|
14 |
+
# Hugging Face API key, used for the serverless access to ASR model
|
15 |
+
with open('KEY_HF', 'r') as file:
|
16 |
+
os.environ['HUGGINGFACE_API_KEY'] = file.read().replace('\n', '')
|
17 |
+
|
18 |
+
|
19 |
+
AUDIO_OUT_FILE_PREFIX = "output" # prefixo do nome do arquivo de áudio .wav
|
20 |
|
21 |
TEMPLATE_SYSTEM_MESSAGE = """Sua função é entreter uma criança com idade entre 6 e 8 anos que adora futebol. Diretrizes para a conversa:
|
22 |
- Seu nome é {NAME}
|
|
|
62 |
input=assistant_msg
|
63 |
)
|
64 |
|
65 |
+
output_audio_file = f"{AUDIO_OUT_FILE_PREFIX}-{len(chat_history)+1:03}.wav"
|
66 |
#response.stream_to_file(output_audio_file)
|
67 |
response.write_to_file(output_audio_file)
|
68 |
|
|
|
75 |
def reset_and_apply(openai_key, voice):
|
76 |
global OPENAI_CLIENT
|
77 |
OPENAI_CLIENT = OpenAI(api_key=openai_key)
|
78 |
+
return [("", "Olá, vamos falar de futebol?")], AUDIO_OUT_FILE_PREFIX + f"-001-{voice}.wav"
|
79 |
|
80 |
def reset_openai_client(openai_key):
|
81 |
global OPENAI_CLIENT
|
|
|
86 |
return TEMPLATE_SYSTEM_MESSAGE.format(NAME=voice.upper(), PERSONALITY=persona_description), persona_temperature
|
87 |
|
88 |
|
89 |
+
USE_LOCAL_ASR_PIPELINE = True
|
90 |
|
91 |
+
# With pipeline (downloaded model)
|
92 |
+
if USE_LOCAL_ASR_PIPELINE:
|
93 |
from transformers import pipeline
|
94 |
import numpy as np
|
95 |
|
96 |
global ASR_PIPELINE
|
97 |
+
ASR_PIPELINE = pipeline(task="automatic-speech-recognition",
|
98 |
+
model="openai/whisper-large-v3")
|
99 |
+
#model="distil-whisper/distil-small.en") # English only
|
100 |
else:
|
101 |
import requests
|
|
|
|
|
|
|
102 |
global ASR_API_URL, ASR_API_HEADERS
|
103 |
|
104 |
+
HF_KEY = os.environ['HUGGINGFACE_API_KEY']
|
105 |
+
|
106 |
# Serverless API endpoint for OpenAI's Whisper model
|
107 |
ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
|
108 |
ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
|
109 |
|
110 |
|
111 |
def transcribe(audio_in):
|
112 |
+
if USE_LOCAL_ASR_PIPELINE:
|
113 |
# sampling rate and audio data
|
114 |
sr, y = audio_in
|
115 |
y2 = y.astype(np.float32)
|
|
|
130 |
user_message = transcribe(audio_in)
|
131 |
outputs = respond(system_txtbox, user_message, *args)
|
132 |
return outputs
|
|
|
133 |
|
134 |
|
135 |
OPENAI_CLIENT = None
|
|
|
144 |
audio_out = gr.Audio(label="Escute a última mensagem", value=initial_audio, autoplay=True, interactive=False)
|
145 |
|
146 |
user_msg_txb = gr.Textbox(label="Mensagem")
|
147 |
+
if USE_LOCAL_ASR_PIPELINE:
|
148 |
+
audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='numpy')
|
149 |
+
else:
|
150 |
+
audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath')
|
151 |
|
152 |
submit_btn = gr.Button("Enviar")
|
153 |
|
|
|
155 |
reset_btn = gr.Button("Reiniciar")
|
156 |
|
157 |
with gr.Accordion(label="Configurações",open=False):
|
158 |
+
if LOAD_KEYS_FROM_FILES:
|
159 |
openai_key = gr.Textbox(label="OPENAI API Key", value=os.environ['OPENAI_API_KEY'])
|
160 |
else:
|
161 |
openai_key = gr.Textbox(label="OPENAI API Key", placeholder="Insert your API key here")
|