Spaces:

nbiish
/

boozhoo_bot_prelangchain

Sleeping

File size: 3,861 Bytes

62b117d

# Import the required libraries
import gradio as gr
import openai
from gtts import gTTS
from pydub import AudioSegment
import os


messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}]
full_transcript = []
openai.api_key = ""
audio_file = 'response.mp3'


def set_api(my_key):
    openai.api_key = my_key


def create_image(response):
    # Send text to be summarized
    dalle_prompt = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
        {"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'}
        ]
    )
    # Use summary as prompt for pic
    dalle_summary = openai.Image.create(
            prompt = dalle_prompt["choices"][0]["message"]["content"],
            size="512x512"
        )
    image_url = dalle_summary['data'][0]['url']
    return image_url


def speak(system_message):
    global audio_file
    content = system_message['content']
    tts = gTTS(content, lang='en', slow=False)
    tts.save("response.mp3")
    return "response.mp3"



def transcribe(gradio_input, api_key):
    global messages
    global full_transcript
    global audio_file
    set_api(api_key)
    
    #Transcribe audio
    input_audio = AudioSegment.from_file(gradio_input)
    input_audio.export("input_audio.wav", format="wav")
    with open("input_audio.wav", "rb") as audio_file:
        print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n")
        transcript = openai.Audio.transcribe("whisper-1", audio_file)


    #Append content to messages
    full_transcript.append(transcript["text"])
    messages.append({"role": "user", "content": transcript["text"]})


    #Send the latest set of messages to OpenAI to get a response
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    # Extract the latest system message from the response and add it as a new message to the messages list
    system_message = response["choices"][0]["message"]
    messages.append(system_message)


    pic_url = create_image(response)
    speech = speak(system_message)


    # Combine all messages in the messages list to create a chat transcript
    chat_transcript = ""
    for message in messages:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"


    return speech, chat_transcript, pic_url



MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).'
API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)'


# Create a Gradio interface 
demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(source="microphone", type="filepath", show_label=False),
        gr.Textbox(
            label="OpenAI API Key",
            lines=1,
            placeholder="Enter your OpenAI API key",
            default=None,
            type="password",
            fn=set_api,
        ),
    ],
    outputs=[
        gr.Audio(show_label=False),
        gr.Textbox(label="Transcript:"),
        gr.Image(show_label=False),
    ],
    title="Boozhoo Bot",
    description=f"""
    Anishinaabe Chatbot

    Applies OpenAI's Whisper to transcribe audio input.
    GPT-3.5 Turbo to generate a response.
    Dall-E 2.0 to generate an image.
    gTTS to generate audio response.

    1) Record to get started
    2) Press X near recording to keep going
    3) Refresh page to restart

    {MY_INFO}
    {API_INFO}

    """, 
)


if __name__ == "__main__":
    demo.queue().launch()