WhisperDemo

Running

File size: 4,024 Bytes

import os
import gradio as gr
import openai
from gtts import gTTS # Google Text To Speech

# load the api key
openai.api_key = ''
# takes an audio file from the microphone
# submits the raw audio to OpenAI for 
# Speech to Text Translation
# input from Microphone Component
# output to User Input - Textbox Component
def transcribe(audio):   
        audio_file = open(audio, "rb")    
        # Call the transcribe method with the file-like object
        transcript = openai.Audio.transcribe("whisper-1", audio_file)
        
        return transcript["text"]



# Create a Gradio App using Blocks    
with gr.Blocks() as demo:
    gr.Markdown(
    """
    # bienvenido al Chat Bot!
    """
    )
    with gr.Accordion("Click for Instructions:"):
            gr.Markdown(
    """
    * Dime en lo que te puedo ayudar
    """)
        
    
    # First message as instructions to OpenAI
    # Establishes a State object to create a
    # unique state for each user and on reload
    #DGG messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}])
    messages = gr.State(value=[{"role": "system", "content": "Eres un cajero bancario. Responde brevemente."}])

    # Takes the users transcribed audio as a string
    # Takes the messages list as a reference
    # Sends the ongoing chat log to OpenAI
    # input from User Input - Textbox Component
    # output to Chat Log - Textbox Component
    def botResponse(user_input, messages):
        # adds the user input to the ongoing chat log
        # and submits the log to OpenAI
        messages.append({"role": "user", "content": user_input})
        response = openai.ChatCompletion.create(
          model="gpt-3.5-turbo-0301",
          messages=messages
        )

        # Parse the response from OpenAI and store
        # it in the chat log
        system_message = response["choices"][0]["message"]["content"]
        messages.append({"role": "assistant", "content": system_message})

        # Process the messages list to get the
        # chat log into a string. Exclude the
        # System responses from the string
        chat_transcript = ""
        for message in messages:
            if (message["role"] != "system"):
                chat_transcript += message["role"] + ": " + message["content"] + "\n\n"

        return chat_transcript

    # Gets the last message in the 
    # chat log and uses GTTS to
    # convert the last response into
    # an audio file. Returns a path to
    # the converted text as an mp3 file
    # input from messages as a reference
    # output to GPT Voice - Audio Component
    def giveVoice(messages):
        bot_message=messages[-1]
        
        myobj = gTTS(text=bot_message["content"])
        myobj.save("temp.mp3")
    
        dir = os.getcwd()
        new_path = os.path.join(dir, "temp.mp3")
    
        return new_path

    # Creates the Gradio interface objects
    # The submit button triggers a cascade of
    # events that each engage a different 
    # component as input/output
    with gr.Row():
        with gr.Column(scale=1):
            user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase")
            submit_btn = gr.Button(value="Transcribe Audio")
            #DGGsubmit_btn2 = gr.Button(value="Submit Text")
            gpt_voice = gr.Audio(label="Consejero")
        with gr.Column(scale=2):
            user_transcript = gr.Text(label="Audio Translation", interactive=False)
            #DGGuser_text = gr.Text(label="Text Input")
            gpt_transcript = gr.Text(label="Chat Transcript")
    submit_btn.click(transcribe, user_audio, user_transcript)
    #DGGsubmit_btn2.click(botResponse, [user_text, messages], gpt_transcript)
    user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript)
    gpt_transcript.change(giveVoice, messages, gpt_voice)
    
    
# creates a local web server
# if share=True creates a public
# demo on huggingface.co
demo.launch(share=False)