import os import nltk import openai import time import gradio as gr from threading import Thread from assets.char_poses_base64 import ( CHAR_IDLE_HTML, CHAR_THINKING_HTML, CHAR_TALKING_HTML) from app_utils import ( get_chat_history, initialize_knowledge_base, text_to_speech_gen, logging, buzz_user) global FUNC_CALL FUNC_CALL = 0 global BUZZ_TIMEOUT BUZZ_TIMEOUT = 60 GENERAL_RSPONSE_TRIGGERS = ["I don't understand the question.", "I don't know", "Hello, my name is", "mentioned in the context provided"] MESSAGES = [{"role": "system", "content": "You are a helpful assistant.."}] LOGGER = logging.getLogger('voice_agent') AUDIO_HTML = '' # Uncomment If this is your first Run: nltk.download('averaged_perceptron_tagger') conv_model, voice_model = initialize_knowledge_base() def idle_timer(): global BUZZ_TIMEOUT while True: time.sleep(BUZZ_TIMEOUT) buzz_user() if BUZZ_TIMEOUT == 80: time.sleep(BUZZ_TIMEOUT) BUZZ_TIMEOUT = 60 def update_img(): global FUNC_CALL FUNC_CALL += 1 if FUNC_CALL % 2== 0: return CHAR_TALKING_HTML else: return CHAR_THINKING_HTML def get_response(history, audio_input): query_type = 'text' question =history[-1][0] global BUZZ_TIMEOUT BUZZ_TIMEOUT = 80 if not question: if audio_input: query_type = 'audio' os.rename(audio_input, audio_input + '.wav') audio_file = open(audio_input + '.wav', "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) question = transcript['text'] else: return None, None LOGGER.info("\nquery_type: %s", query_type) LOGGER.info("query_text: %s", question) print('\nquery_type:', query_type) print('\nquery_text:', question) if question.lower().strip() == 'hi': question = 'hello' answer = conv_model.run(question) LOGGER.info("\ndocument_response: %s", answer) print('\ndocument_response:', answer) for trigger in GENERAL_RSPONSE_TRIGGERS: if trigger in answer: MESSAGES.append({"role": "user", "content": question}) chat = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=MESSAGES, temperature=0.7, n=128, stop="\n" ) answer = chat.choices[0].message.content MESSAGES.append({"role": "assistant", "content": answer}) LOGGER.info("general_response: %s", answer) print('\ngeneral_response:', answer) AUDIO_HTML = text_to_speech_gen(answer) history[-1][1] = answer return history, AUDIO_HTML # buzz_usr_proc = Thread(target=idle_timer) with gr.Blocks(title="Your Assistance Pal!") as demo: with gr.Row(): output_html = gr.HTML(label="Felix's Voice", value=AUDIO_HTML) output_html.visible = False assistant_character = gr.HTML(label=None, value=CHAR_IDLE_HTML, show_label=False) with gr.Column(scale=0.1): chatbot = gr.Chatbot(label='Send a text or a voice input').style(height=285) with gr.Row(): msg = gr.Textbox(placeholder='Write a chat & press Enter.', show_label=False).style(container=False) with gr.Column(scale=0.5): audio_input = gr.Audio(source="microphone", type='filepath', show_label=False).style(container=False) button = gr.Button(value="Send") msg.submit(get_chat_history, [msg, chatbot], [msg, chatbot] ).then(update_img, outputs=[assistant_character] ).then(get_response, [chatbot, audio_input], [chatbot, output_html] ).then(update_img, outputs=[assistant_character]) button.click(get_chat_history, [msg, chatbot], [msg, chatbot] ).then(update_img, outputs=[assistant_character] ).then(get_response, [chatbot, audio_input], [chatbot, output_html] ).then(update_img, outputs=[assistant_character]) # buzz_usr_proc.start() demo.launch(debug=False, favicon_path='assets/favicon.png', show_api=False, share=False)