File size: 3,640 Bytes
6a709b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdaca80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import openai
import os
from dotenv import load_dotenv
from pydub import AudioSegment

load_dotenv()

#accessing openapi Key
openai.api_key = os.getenv("OPENAI_API_KEY")

audio_messages = [{"role": "system", "content": 'You are an AI assistant expert. Respond to all input in precise, crisp and easy to understand language.'}]
text_messages = [{"role": "system", "content": 'You are an AI assistant expert. Respond to all input in precise, crisp and easy to understand language.'}]
global user_text_input, text_output, user_audio_input, audio_output

"""
It seems like the gr.Audio source is not generating a WAV file, which is required for the openai.Audio.transcribe() method to work. 
To convert the audio file to WAV format, i have used a library like Pydub.
"""

def audio_transcribe(audio):
    global audio_messages
    audio_message = audio_messages

    #audio processing to whisper API.
    audio_file = AudioSegment.from_file(audio)
    audio_file.export("temp.wav", format="wav")
    final_audio_file = open("temp.wav", "rb")
    transcript = openai.Audio.transcribe("whisper-1", final_audio_file)
    os.remove("temp.wav")

    #transcripted input to chatGPT API for chatCompletion 
    audio_message.append({"role": "user", "content": transcript["text"]}) # type: ignore
    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=audio_message)
    system_message = response["choices"][0]["message"] # type: ignore
    audio_message.append(system_message)

    chat_transcript = ""
    for message in audio_message:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

    return chat_transcript

def text_transcribe(name):
    global text_messages
    text_message = text_messages
    user_text_input.update("")
    #transcripted input to chatGPT API
    text_message.append({"role": "user", "content": name}) # type: ignore
    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=text_message)
    system_message = response["choices"][0]["message"] # type: ignore
    text_message.append(system_message)
    
    chat_transcript = ""
    for message in text_message:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
    return chat_transcript

title = """<h1 align="center">Your Chat-GPT AI Assistant at your Service!! 😎 </h1>"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.HTML(title)
    with gr.Tab("Audio Input"):
        with gr.Row():
            user_audio_input = (gr.Audio(source="microphone", type="filepath", label="Speak Here"))
            audio_input = user_audio_input
            audio_output = gr.Textbox(label="AI Response", lines=20, placeholder="AI Response will be displayed here...")
        with gr.Row():
            audio_submit_button = gr.Button("Submit")
    with gr.Tab("Text Input"):
        with gr.Row():
            user_text_input = (gr.Textbox(label="Type Here", lines=20, placeholder="Type your message here..."))
            text_input = user_text_input
            text_output = gr.Textbox(label="AI Response", lines=20, placeholder="AI Response will be displayed here...")
        with gr.Row():
            text_submit_button = gr.Button("Submit")
    audio_submit_button.click(fn=audio_transcribe, inputs=audio_input, outputs=audio_output)
    text_submit_button.click(fn=text_transcribe, inputs=text_input, outputs=text_output)
    
    gr.Markdown("<center> Made with ❤️ by Tanish Gupta. Credits to 🤗 Spaces for Hosting this App </center>")

demo.launch()