File size: 3,861 Bytes
62b117d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Import the required libraries
import gradio as gr
import openai
from gtts import gTTS
from pydub import AudioSegment
import os


messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}]
full_transcript = []
openai.api_key = ""
audio_file = 'response.mp3'


def set_api(my_key):
    openai.api_key = my_key


def create_image(response):
    # Send text to be summarized
    dalle_prompt = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
        {"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'}
        ]
    )
    # Use summary as prompt for pic
    dalle_summary = openai.Image.create(
            prompt = dalle_prompt["choices"][0]["message"]["content"],
            size="512x512"
        )
    image_url = dalle_summary['data'][0]['url']
    return image_url


def speak(system_message):
    global audio_file
    content = system_message['content']
    tts = gTTS(content, lang='en', slow=False)
    tts.save("response.mp3")
    return "response.mp3"



def transcribe(gradio_input, api_key):
    global messages
    global full_transcript
    global audio_file
    set_api(api_key)
    
    #Transcribe audio
    input_audio = AudioSegment.from_file(gradio_input)
    input_audio.export("input_audio.wav", format="wav")
    with open("input_audio.wav", "rb") as audio_file:
        print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n")
        transcript = openai.Audio.transcribe("whisper-1", audio_file)


    #Append content to messages
    full_transcript.append(transcript["text"])
    messages.append({"role": "user", "content": transcript["text"]})


    #Send the latest set of messages to OpenAI to get a response
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    # Extract the latest system message from the response and add it as a new message to the messages list
    system_message = response["choices"][0]["message"]
    messages.append(system_message)


    pic_url = create_image(response)
    speech = speak(system_message)


    # Combine all messages in the messages list to create a chat transcript
    chat_transcript = ""
    for message in messages:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"


    return speech, chat_transcript, pic_url



MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).'
API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)'


# Create a Gradio interface 
demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(source="microphone", type="filepath", show_label=False),
        gr.Textbox(
            label="OpenAI API Key",
            lines=1,
            placeholder="Enter your OpenAI API key",
            default=None,
            type="password",
            fn=set_api,
        ),
    ],
    outputs=[
        gr.Audio(show_label=False),
        gr.Textbox(label="Transcript:"),
        gr.Image(show_label=False),
    ],
    title="Boozhoo Bot",
    description=f"""
    Anishinaabe Chatbot

    Applies OpenAI's Whisper to transcribe audio input.
    GPT-3.5 Turbo to generate a response.
    Dall-E 2.0 to generate an image.
    gTTS to generate audio response.

    1) Record to get started
    2) Press X near recording to keep going
    3) Refresh page to restart

    {MY_INFO}
    {API_INFO}

    """, 
)


if __name__ == "__main__":
    demo.queue().launch()