Spaces:

aiegoo
/

whisper-chatbot-ko

Runtime error

App Files Files Community

Added autoplay audio and fix responsiveness issue

by Rifky - opened Jun 17, 2023

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+208

-25

Files changed (1) hide show

app.py +208 -25

app.py CHANGED Viewed

@@ -3,13 +3,76 @@ import base64
 import requests
 import secrets
 import os
 from io import BytesIO
 from pydub import AudioSegment
 def create_chat_session():
-    r = requests.post("http://121.176.153.117:5000/create")
     if (r.status_code != 201):
         raise Exception("Failed to create chat session")
@@ -20,9 +83,19 @@ def create_chat_session():
     return session_id
-session_id = create_chat_session()
-chat_history = []
 def add_text(history, text):
     history = history + [(text, None)]
@@ -43,7 +116,7 @@ def add_audio(history, audio):
     history = history + [((f"temp_audio/{session_id}/audio_input_{audio_id}.mp3",), None)]
     response = requests.post(
-        "http://121.176.153.117:5000/transcribe",
         files={'audio': audio_file.getvalue()}
     )
@@ -56,6 +129,21 @@ def add_audio(history, audio):
     return history, gr.update(value="", interactive=False)
 def bot(history):
     if type(history[-1][0]) == str:
@@ -64,7 +152,7 @@ def bot(history):
         message = history[-2][0]
     response = requests.post(
-        f"http://121.176.153.117:5000/send/text/{session_id}",
         headers={'Content-type': 'application/json'},
         json={
             'message': message,
@@ -93,37 +181,132 @@ def bot(history):
     return history
 def load_chat_history(history):
     global chat_history
     if len(chat_history) > len(history):
         history = chat_history
     return history
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
-    demo.load(load_chat_history, [chatbot], [chatbot], queue=False)
-    with gr.Row():
-        with gr.Column(scale=0.85):
-            txt = gr.Textbox(
                 show_label=False,
-                placeholder="Enter text and press enter, or record audio",
             ).style(container=False)
-        with gr.Column(scale=0.15, min_width=0):
-            audio = gr.Audio(
-                source="microphone", type="numpy", show_label=False, format="mp3"
             ).style(container=False)
-    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-        bot, chatbot, chatbot
-    )
-    txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
-    audio_msg = audio.change(add_audio, [chatbot, audio], [chatbot, audio], queue=False, preprocess=False, postprocess=False).then(
-        bot, chatbot, chatbot
-    )
-    audio_msg.then(lambda: gr.update(interactive=True, value=None), None, [audio], queue=False)
-demo.launch(show_error=True)

 import requests
 import secrets
 import os
+import argparse
 from io import BytesIO
 from pydub import AudioSegment
+LOCAL_API_ENDPOINT = "http://localhost:5000"
+PUBLIC_API_ENDPOINT = "http://121.176.153.117:5000"
+API_ENDPOINT = PUBLIC_API_ENDPOINT
+session_id = ""
+chat_history = []
+css = """
+#audio_input {
+    margin-top: -30px; !important;
+    margin-left: -15px; !important;
+    width: 100% !important;
+}
+#audio_input button {
+    height:50px !important;
+    font-size: 0px !important;
+    width: 110% !important;
+}
+#audio_input button:after {
+    content: '🎤' !important;
+    font-size: 16px !important;
+}
+audio {
+    min-width: 200px !important;
+}
+@media (max-width : 480px) {
+    #audio_input {
+        width: 120% !important;
+    }
+    #audio_input button:after {
+        content: '' !important;
+    }
+    #txt_input_container {
+        flex-grow: 70% !important;
+    }
+    #audio_input_container {
+        flex-grow: 30% !important;
+    }
+}
+"""
+js_audio_auto_play = """
+() => {
+    // select last audio element
+    const audio = document.getElementsByTagName('audio');
+    const last_audio = audio[audio.length - 1];
+    // set autoplay attribute
+    last_audio.setAttribute('autoplay', true);
+}
+"""
 def create_chat_session():
+    r = requests.post(API_ENDPOINT + "/create")
     if (r.status_code != 201):
         raise Exception("Failed to create chat session")
     return session_id
+def create_new_or_change_session(history, id):
+    global session_id
+    global chat_history
+    if id == "":
+        session_id = create_chat_session()
+        history = []
+    else:
+        history, _ = change_session(history, id)
+    chat_history = history
+    return history, gr.update(value="", interactive=False)
 def add_text(history, text):
     history = history + [(text, None)]
     history = history + [((f"temp_audio/{session_id}/audio_input_{audio_id}.mp3",), None)]
     response = requests.post(
+        API_ENDPOINT + "/transcribe",
         files={'audio': audio_file.getvalue()}
     )
     return history, gr.update(value="", interactive=False)
+def reset_chat_session(history):
+    global session_id
+    global chat_history
+    response = requests.post(
+        API_ENDPOINT + f"/reset/{session_id}"
+    )
+    if (response.status_code != 200):
+        raise Exception(response.text)
+    history = []
+    chat_history = []
+    return history
 def bot(history):
     if type(history[-1][0]) == str:
         message = history[-2][0]
     response = requests.post(
+        API_ENDPOINT + f"/send/text/{session_id}",
         headers={'Content-type': 'application/json'},
         json={
             'message': message,
     return history
+def change_session(history, id):
+    global session_id
+    global chat_history
+    response = requests.get(
+        API_ENDPOINT + f"/{id}"
+    )
+    if (response.status_code != 200):
+        raise Exception(response.text)
+    response = response.json()
+    session_id = id
+    history = []
+    try:
+        for chat in response:
+            if chat['role'] == 'user':
+                if chat['audio'] != "":
+                    audio_bytes = base64.b64decode(chat['audio'].encode('utf-8'))
+                    audio_file = BytesIO(audio_bytes)
+                    audio_id = secrets.token_hex(8)
+                    AudioSegment.from_file(audio_file).export(f"temp_audio/{id}/audio_input_{audio_id}.mp3", format="mp3")
+                    history = history + [((f"temp_audio/{id}/audio_input_{audio_id}.mp3",), None)]
+                history = history + [(chat['message'], None)]
+            elif chat['role'] == 'assistant':
+                audio_bytes = base64.b64decode(chat['audio'].encode('utf-8'))
+                audio_file = BytesIO(audio_bytes)
+                audio_id = secrets.token_hex(8)
+                AudioSegment.from_file(audio_file).export(f"temp_audio/{id}/audio_input_{audio_id}.mp3", format="mp3")
+                history = history + [(None, (f"temp_audio/{id}/audio_input_{audio_id}.mp3",))]
+                history = history + [(None, chat['message'])]
+            else:
+                raise Exception("Invalid chat role")
+    except Exception as e:
+        raise Exception(f"Response: {response}")
+    chat_history = history.copy()
+    print(f"len(chat_history): {len(chat_history)}\nlen(history): {len(history)}\nlen(response): {len(response)}")
+    return history, gr.update(value="", interactive=False)
 def load_chat_history(history):
     global chat_history
     if len(chat_history) > len(history):
         history = chat_history
     return history
+def main():
+    global session_id
+    global chat_history
+    session_id = create_chat_session()
+    chat_history = []
+    with gr.Blocks(css=css) as demo:
+        with gr.Row():
+            # change session id
+            change_session_txt = gr.Textbox(
                 show_label=False,
+                placeholder=session_id,
             ).style(container=False)
+        with gr.Row():
+            # button to create new or change session id
+            change_session_button = gr.Button(
+                "Create new or change session", type='success', size="sm"
             ).style(container=False)
+        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
+        demo.load(load_chat_history, [chatbot], [chatbot], queue=False)
+        with gr.Row():
+            with gr.Column(scale=0.85, min_width=0, elem_id="txt_input_container"):
+                txt = gr.Textbox(
+                    show_label=False,
+                    placeholder="Enter text and press enter, or record audio",
+                    elem_id="txt_input"
+                ).style(container=False)
+            with gr.Column(scale=0.15, min_width=0, elem_id="audio_input_container"):
+                audio = gr.Audio(
+                    source="microphone", type="numpy", show_label=False, format="mp3", min_width=0, container=False, elem_id="audio_input"
+                )
+        with gr.Row():
+            reset_button = gr.Button(
+                "Reset Chat Session", type='stop', size="sm"
+            ).style(container=False)
+        txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+            bot, chatbot, chatbot
+        ).then(
+            None, [], [], queue=False, _js=js_audio_auto_play
+        )
+        txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
+        audio_msg = audio.change(add_audio, [chatbot, audio], [chatbot, audio], queue=False, preprocess=False, postprocess=False).then(
+            bot, chatbot, chatbot
+        ).then(
+            None, [], [], queue=False, _js=js_audio_auto_play
+        )
+        audio_msg.then(lambda: gr.update(interactive=True, value=None), None, [audio], queue=False)
+        reset_button.click(reset_chat_session, [chatbot], [chatbot], queue=False)
+        chgn_msg = change_session_txt.submit(change_session, [chatbot, change_session_txt], [chatbot, change_session_txt], queue=False)
+        chgn_msg.then(lambda: gr.update(interactive=True, placeholder=session_id), None, [change_session_txt], queue=False)
+        create_new_or_change_session_btn = change_session_button.click(create_new_or_change_session, [chatbot, change_session_txt], [chatbot, change_session_txt], queue=False)
+        create_new_or_change_session_btn.then(lambda: gr.update(interactive=True, placeholder=session_id), None, [change_session_txt], queue=False)
+        return demo
+if __name__ == "__main__":
+    # arguments --local
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local", action="store_true", help="Use local API endpoint")
+    args = parser.parse_args()
+    if args.local:
+        API_ENDPOINT = LOCAL_API_ENDPOINT
+    demo = main()
+    demo.launch(show_error=True, server_name="0.0.0.0")