import os import openai import whisper import gradio as gr openai.api_key = os.environ.get('SessionToken') whisper_model = whisper.load_model("small") conversation = "" user_name = "MH" bot_name = "bbDemo" def chat_hf(audio): conversation = "" try: whisper_text = translate(audio) user_input = whisper_text # Conversation route prompt = user_name + ": " + user_input + "\n" + bot_name+ ": " conversation += prompt # allows for context # fetch the response from open AI api response = openai.Completion.create(engine='text-davinci-003', prompt=conversation, max_tokens=50) response_str = response["choices"][0]["text"].replace("\n", "") response_str = response_str.split(user_name + ": ", 1)[0].split(bot_name + ": ", 1)[0] conversation += response_str + "\n" gpt_response = response_str except: # Conversation route whisper_text = translate(audio) user_input = whisper_text prompt = user_name + ": " + user_input + "\n" + bot_name+ ": " conversation += prompt # allows for context # fetch the response from open AI api response = openai.Completion.create(engine='text-davinci-003', prompt=conversation, max_tokens=1024) response_str = response["choices"][0]["text"].replace("\n", "") response_str = response_str.split(user_name + ": ", 1)[0].split(bot_name + ": ", 1)[0] conversation += response_str + "\n" gpt_response = response_str print("Error") return whisper_text, gpt_response def translate(audio): print(""" — Sending audio to Whisper ... — """) audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) _, probs = whisper_model.detect_language(mel) transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False) transcription = whisper.decode(whisper_model, mel, transcript_options) print("language spoken: " + transcription.language) print("transcript: " + transcription.text) print("———————————————————————————————————————————") return transcription.text title = """

Whisper to chatGPT

Chat with GPT with your voice in your native language!

""" article = """ """ css = ''' #col-container {max-width: 700px; margin-left: auto; margin-right: auto;} a {text-decoration-line: underline; font-weight: 600;} .footer { margin-bottom: 45px; margin-top: 35px; text-align: center; border-bottom: 1px solid #e5e5e5; } .footer>p { font-size: .8rem; display: inline-block; padding: 0 10px; transform: translateY(10px); background: white; } .dark .footer { border-color: #303030; } .dark .footer>p { background: #0b0f19; } ''' with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML(title) with gr.Row(): record_input = gr.Audio(source="microphone",type="filepath", show_label=False) send_btn = gr.Button("Send my message !") with gr.Column(): audio_translation = gr.Textbox(type="text",label="Whisper transcription") gpt_response = gr.Textbox(type="text",label="chatGPT response") gr.HTML(article) send_btn.click(chat_hf, inputs=[record_input], outputs=[audio_translation, gpt_response]) demo.queue(max_size=32, concurrency_count=20).launch(debug=True)