K3NW48
release
62b117d
# Import the required libraries
import gradio as gr
import openai
from gtts import gTTS
from pydub import AudioSegment
import os
messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}]
full_transcript = []
openai.api_key = ""
audio_file = 'response.mp3'
def set_api(my_key):
openai.api_key = my_key
def create_image(response):
# Send text to be summarized
dalle_prompt = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'}
]
)
# Use summary as prompt for pic
dalle_summary = openai.Image.create(
prompt = dalle_prompt["choices"][0]["message"]["content"],
size="512x512"
)
image_url = dalle_summary['data'][0]['url']
return image_url
def speak(system_message):
global audio_file
content = system_message['content']
tts = gTTS(content, lang='en', slow=False)
tts.save("response.mp3")
return "response.mp3"
def transcribe(gradio_input, api_key):
global messages
global full_transcript
global audio_file
set_api(api_key)
#Transcribe audio
input_audio = AudioSegment.from_file(gradio_input)
input_audio.export("input_audio.wav", format="wav")
with open("input_audio.wav", "rb") as audio_file:
print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
#Append content to messages
full_transcript.append(transcript["text"])
messages.append({"role": "user", "content": transcript["text"]})
#Send the latest set of messages to OpenAI to get a response
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
# Extract the latest system message from the response and add it as a new message to the messages list
system_message = response["choices"][0]["message"]
messages.append(system_message)
pic_url = create_image(response)
speech = speak(system_message)
# Combine all messages in the messages list to create a chat transcript
chat_transcript = ""
for message in messages:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return speech, chat_transcript, pic_url
MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).'
API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)'
# Create a Gradio interface
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", show_label=False),
gr.Textbox(
label="OpenAI API Key",
lines=1,
placeholder="Enter your OpenAI API key",
default=None,
type="password",
fn=set_api,
),
],
outputs=[
gr.Audio(show_label=False),
gr.Textbox(label="Transcript:"),
gr.Image(show_label=False),
],
title="Boozhoo Bot",
description=f"""
Anishinaabe Chatbot
Applies OpenAI's Whisper to transcribe audio input.
GPT-3.5 Turbo to generate a response.
Dall-E 2.0 to generate an image.
gTTS to generate audio response.
1) Record to get started
2) Press X near recording to keep going
3) Refresh page to restart
{MY_INFO}
{API_INFO}
""",
)
if __name__ == "__main__":
demo.queue().launch()