Spaces:
Sleeping
Sleeping
File size: 3,861 Bytes
62b117d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# Import the required libraries
import gradio as gr
import openai
from gtts import gTTS
from pydub import AudioSegment
import os
messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}]
full_transcript = []
openai.api_key = ""
audio_file = 'response.mp3'
def set_api(my_key):
openai.api_key = my_key
def create_image(response):
# Send text to be summarized
dalle_prompt = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'}
]
)
# Use summary as prompt for pic
dalle_summary = openai.Image.create(
prompt = dalle_prompt["choices"][0]["message"]["content"],
size="512x512"
)
image_url = dalle_summary['data'][0]['url']
return image_url
def speak(system_message):
global audio_file
content = system_message['content']
tts = gTTS(content, lang='en', slow=False)
tts.save("response.mp3")
return "response.mp3"
def transcribe(gradio_input, api_key):
global messages
global full_transcript
global audio_file
set_api(api_key)
#Transcribe audio
input_audio = AudioSegment.from_file(gradio_input)
input_audio.export("input_audio.wav", format="wav")
with open("input_audio.wav", "rb") as audio_file:
print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
#Append content to messages
full_transcript.append(transcript["text"])
messages.append({"role": "user", "content": transcript["text"]})
#Send the latest set of messages to OpenAI to get a response
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
# Extract the latest system message from the response and add it as a new message to the messages list
system_message = response["choices"][0]["message"]
messages.append(system_message)
pic_url = create_image(response)
speech = speak(system_message)
# Combine all messages in the messages list to create a chat transcript
chat_transcript = ""
for message in messages:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return speech, chat_transcript, pic_url
MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).'
API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)'
# Create a Gradio interface
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", show_label=False),
gr.Textbox(
label="OpenAI API Key",
lines=1,
placeholder="Enter your OpenAI API key",
default=None,
type="password",
fn=set_api,
),
],
outputs=[
gr.Audio(show_label=False),
gr.Textbox(label="Transcript:"),
gr.Image(show_label=False),
],
title="Boozhoo Bot",
description=f"""
Anishinaabe Chatbot
Applies OpenAI's Whisper to transcribe audio input.
GPT-3.5 Turbo to generate a response.
Dall-E 2.0 to generate an image.
gTTS to generate audio response.
1) Record to get started
2) Press X near recording to keep going
3) Refresh page to restart
{MY_INFO}
{API_INFO}
""",
)
if __name__ == "__main__":
demo.queue().launch() |