# Import the required libraries import gradio as gr import openai from gtts import gTTS from pydub import AudioSegment import os messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}] full_transcript = [] openai.api_key = "" audio_file = 'response.mp3' def set_api(my_key): openai.api_key = my_key def create_image(response): # Send text to be summarized dalle_prompt = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'} ] ) # Use summary as prompt for pic dalle_summary = openai.Image.create( prompt = dalle_prompt["choices"][0]["message"]["content"], size="512x512" ) image_url = dalle_summary['data'][0]['url'] return image_url def speak(system_message): global audio_file content = system_message['content'] tts = gTTS(content, lang='en', slow=False) tts.save("response.mp3") return "response.mp3" def transcribe(gradio_input, api_key): global messages global full_transcript global audio_file set_api(api_key) #Transcribe audio input_audio = AudioSegment.from_file(gradio_input) input_audio.export("input_audio.wav", format="wav") with open("input_audio.wav", "rb") as audio_file: print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n") transcript = openai.Audio.transcribe("whisper-1", audio_file) #Append content to messages full_transcript.append(transcript["text"]) messages.append({"role": "user", "content": transcript["text"]}) #Send the latest set of messages to OpenAI to get a response response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages ) # Extract the latest system message from the response and add it as a new message to the messages list system_message = response["choices"][0]["message"] messages.append(system_message) pic_url = create_image(response) speech = speak(system_message) # Combine all messages in the messages list to create a chat transcript chat_transcript = "" for message in messages: if message['role'] != 'system': chat_transcript += message['role'] + ": " + message['content'] + "\n\n" return speech, chat_transcript, pic_url MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).' API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)' # Create a Gradio interface demo = gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", show_label=False), gr.Textbox( label="OpenAI API Key", lines=1, placeholder="Enter your OpenAI API key", default=None, type="password", fn=set_api, ), ], outputs=[ gr.Audio(show_label=False), gr.Textbox(label="Transcript:"), gr.Image(show_label=False), ], title="Boozhoo Bot", description=f""" Anishinaabe Chatbot Applies OpenAI's Whisper to transcribe audio input. GPT-3.5 Turbo to generate a response. Dall-E 2.0 to generate an image. gTTS to generate audio response. 1) Record to get started 2) Press X near recording to keep going 3) Refresh page to restart {MY_INFO} {API_INFO} """, ) if __name__ == "__main__": demo.queue().launch()