|
import os |
|
import time |
|
|
|
import gradio as gr |
|
import openai |
|
from dotenv import load_dotenv, find_dotenv |
|
from simpleaichat import AIChat |
|
|
|
from main import weather, search |
|
from utils.tts import TTS, voices |
|
|
|
load_dotenv(find_dotenv()) |
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
def transcribe(audio_file, state=""): |
|
time.sleep(5) |
|
if audio_file is None: |
|
return None |
|
prompt = ( |
|
"The author of this tool is Somto Muotoe. " |
|
"Friends: Ire Ireoluwa Adedugbe, Biola Aderiye, Jelson, Raj, Akshay." |
|
"Umm, let me think like, hmm... Okay, here's what I'm, like, " |
|
"thinking. " |
|
) |
|
with open(audio_file, "rb") as f: |
|
response = openai.Audio.transcribe("whisper-1", f, prompt=prompt) |
|
text = response["text"] |
|
state += text |
|
return state, state |
|
|
|
|
|
def chat_with_gpt(prompt, ai_state, model): |
|
if ai_state is None: |
|
params = {"temperature": 0.0, "max_tokens": 200} |
|
system_prompt = ( |
|
"You are a confidante whose response is curt and concise." |
|
"You can use tools to give real-time updates on weather and search the internet. " |
|
"Answer all questions empathetically, and ALWAYS ask follow-up questions." |
|
"Do NOT say Confidante in any response." |
|
"You must TRUST the provided context to inform your response." |
|
) |
|
ai = AIChat( |
|
params=params, model=model, system=system_prompt, save_messages=True |
|
) |
|
else: |
|
ai = ai_state |
|
tools = [weather, search] |
|
|
|
response = ai(prompt, tools=tools) |
|
text_response = response["response"] |
|
print(text_response) |
|
return text_response, ai |
|
|
|
|
|
def tts(text, voice_id): |
|
|
|
tts_ = TTS(voice_id) |
|
audio_data = tts_.generate(text=text) |
|
return audio_data |
|
|
|
|
|
def transcribe_and_chat(audio_file, voice, history, ai_state, model): |
|
if audio_file is None: |
|
raise gr.Error("Empty audio file.") |
|
voice_id = voices[voice] |
|
|
|
text, text_state = transcribe(audio_file) |
|
gpt_response, ai_state = chat_with_gpt(text, ai_state, model) |
|
audio_data = tts(gpt_response, voice_id) |
|
|
|
|
|
history.append((text, gpt_response)) |
|
|
|
return history, audio_data, history, ai_state |
|
|
|
|
|
def clear_chat(history): |
|
|
|
history.clear() |
|
|
|
|
|
chat_with_gpt("", ai_state=None) |
|
|
|
return history |
|
|
|
|
|
with gr.Blocks(title="JARVIS") as demo: |
|
gr.Markdown( |
|
"# Talk with GPT-4! You can get real-time weather updates, and can search Google." |
|
) |
|
audio_input = gr.Audio(source="microphone", type="filepath", visible=True) |
|
gr.ClearButton(audio_input) |
|
gr.Markdown( |
|
"Choose the AI model to use for generating responses. " |
|
"GPT-4 is slower but more accurate, while GPT-3.5-turbo-16k is faster but less accurate." |
|
) |
|
model_select = gr.Radio( |
|
choices=["gpt-4", "gpt-3.5-turbo-16k"], |
|
label="Model", |
|
value="gpt-3.5-turbo-16k", |
|
) |
|
|
|
voice_select = gr.Radio(choices=list(voices.keys()), label="Voice", value="Bella") |
|
history = gr.State(label="History", value=[]) |
|
ai_state = gr.State(label="AIChat", value=None) |
|
|
|
chat_box = gr.Chatbot(label="Response") |
|
response_audio = gr.Audio(label="Response Audio", autoplay=True) |
|
gr.ClearButton(chat_box, value="Clear Chat") |
|
|
|
|
|
audio_input.stop_recording( |
|
transcribe_and_chat, |
|
inputs=[audio_input, voice_select, history, ai_state, model_select], |
|
outputs=[chat_box, response_audio, history, ai_state], |
|
) |
|
audio_input.clear() |
|
|
|
demo.launch() |
|
|