Added autoplay audio and fix responsiveness issue

#4
by Rifky - opened
Files changed (1) hide show
  1. app.py +208 -25
app.py CHANGED
@@ -3,13 +3,76 @@ import base64
3
  import requests
4
  import secrets
5
  import os
 
6
 
7
  from io import BytesIO
8
  from pydub import AudioSegment
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def create_chat_session():
12
- r = requests.post("http://121.176.153.117:5000/create")
13
 
14
  if (r.status_code != 201):
15
  raise Exception("Failed to create chat session")
@@ -20,9 +83,19 @@ def create_chat_session():
20
 
21
  return session_id
22
 
23
- session_id = create_chat_session()
24
- chat_history = []
 
 
 
 
 
 
 
 
 
25
 
 
26
 
27
  def add_text(history, text):
28
  history = history + [(text, None)]
@@ -43,7 +116,7 @@ def add_audio(history, audio):
43
  history = history + [((f"temp_audio/{session_id}/audio_input_{audio_id}.mp3",), None)]
44
 
45
  response = requests.post(
46
- "http://121.176.153.117:5000/transcribe",
47
  files={'audio': audio_file.getvalue()}
48
  )
49
 
@@ -56,6 +129,21 @@ def add_audio(history, audio):
56
 
57
  return history, gr.update(value="", interactive=False)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def bot(history):
61
  if type(history[-1][0]) == str:
@@ -64,7 +152,7 @@ def bot(history):
64
  message = history[-2][0]
65
 
66
  response = requests.post(
67
- f"http://121.176.153.117:5000/send/text/{session_id}",
68
  headers={'Content-type': 'application/json'},
69
  json={
70
  'message': message,
@@ -93,37 +181,132 @@ def bot(history):
93
 
94
  return history
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def load_chat_history(history):
97
  global chat_history
98
  if len(chat_history) > len(history):
99
  history = chat_history
100
  return history
101
 
 
 
 
102
 
103
- with gr.Blocks() as demo:
104
- chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
105
-
106
- demo.load(load_chat_history, [chatbot], [chatbot], queue=False)
107
 
108
- with gr.Row():
109
- with gr.Column(scale=0.85):
110
- txt = gr.Textbox(
 
111
  show_label=False,
112
- placeholder="Enter text and press enter, or record audio",
113
  ).style(container=False)
114
- with gr.Column(scale=0.15, min_width=0):
115
- audio = gr.Audio(
116
- source="microphone", type="numpy", show_label=False, format="mp3"
 
117
  ).style(container=False)
118
 
119
- txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
120
- bot, chatbot, chatbot
121
- )
122
- txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- audio_msg = audio.change(add_audio, [chatbot, audio], [chatbot, audio], queue=False, preprocess=False, postprocess=False).then(
125
- bot, chatbot, chatbot
126
- )
127
- audio_msg.then(lambda: gr.update(interactive=True, value=None), None, [audio], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- demo.launch(show_error=True)
 
 
3
  import requests
4
  import secrets
5
  import os
6
+ import argparse
7
 
8
  from io import BytesIO
9
  from pydub import AudioSegment
10
 
11
 
12
+ LOCAL_API_ENDPOINT = "http://localhost:5000"
13
+ PUBLIC_API_ENDPOINT = "http://121.176.153.117:5000"
14
+
15
+ API_ENDPOINT = PUBLIC_API_ENDPOINT
16
+
17
+ session_id = ""
18
+ chat_history = []
19
+
20
+ css = """
21
+ #audio_input {
22
+ margin-top: -30px; !important;
23
+ margin-left: -15px; !important;
24
+ width: 100% !important;
25
+ }
26
+
27
+ #audio_input button {
28
+ height:50px !important;
29
+ font-size: 0px !important;
30
+ width: 110% !important;
31
+ }
32
+
33
+ #audio_input button:after {
34
+ content: '🎤' !important;
35
+ font-size: 16px !important;
36
+ }
37
+
38
+ audio {
39
+ min-width: 200px !important;
40
+ }
41
+
42
+ @media (max-width : 480px) {
43
+ #audio_input {
44
+ width: 120% !important;
45
+ }
46
+
47
+ #audio_input button:after {
48
+ content: '' !important;
49
+ }
50
+
51
+ #txt_input_container {
52
+ flex-grow: 70% !important;
53
+ }
54
+
55
+ #audio_input_container {
56
+ flex-grow: 30% !important;
57
+ }
58
+ }
59
+
60
+
61
+ """
62
+
63
+ js_audio_auto_play = """
64
+ () => {
65
+ // select last audio element
66
+ const audio = document.getElementsByTagName('audio');
67
+ const last_audio = audio[audio.length - 1];
68
+
69
+ // set autoplay attribute
70
+ last_audio.setAttribute('autoplay', true);
71
+ }
72
+ """
73
+
74
  def create_chat_session():
75
+ r = requests.post(API_ENDPOINT + "/create")
76
 
77
  if (r.status_code != 201):
78
  raise Exception("Failed to create chat session")
 
83
 
84
  return session_id
85
 
86
+ def create_new_or_change_session(history, id):
87
+ global session_id
88
+ global chat_history
89
+
90
+ if id == "":
91
+ session_id = create_chat_session()
92
+ history = []
93
+ else:
94
+ history, _ = change_session(history, id)
95
+
96
+ chat_history = history
97
 
98
+ return history, gr.update(value="", interactive=False)
99
 
100
  def add_text(history, text):
101
  history = history + [(text, None)]
 
116
  history = history + [((f"temp_audio/{session_id}/audio_input_{audio_id}.mp3",), None)]
117
 
118
  response = requests.post(
119
+ API_ENDPOINT + "/transcribe",
120
  files={'audio': audio_file.getvalue()}
121
  )
122
 
 
129
 
130
  return history, gr.update(value="", interactive=False)
131
 
132
+ def reset_chat_session(history):
133
+ global session_id
134
+ global chat_history
135
+
136
+ response = requests.post(
137
+ API_ENDPOINT + f"/reset/{session_id}"
138
+ )
139
+
140
+ if (response.status_code != 200):
141
+ raise Exception(response.text)
142
+
143
+ history = []
144
+ chat_history = []
145
+
146
+ return history
147
 
148
  def bot(history):
149
  if type(history[-1][0]) == str:
 
152
  message = history[-2][0]
153
 
154
  response = requests.post(
155
+ API_ENDPOINT + f"/send/text/{session_id}",
156
  headers={'Content-type': 'application/json'},
157
  json={
158
  'message': message,
 
181
 
182
  return history
183
 
184
+ def change_session(history, id):
185
+ global session_id
186
+ global chat_history
187
+
188
+ response = requests.get(
189
+ API_ENDPOINT + f"/{id}"
190
+ )
191
+
192
+ if (response.status_code != 200):
193
+ raise Exception(response.text)
194
+
195
+ response = response.json()
196
+
197
+ session_id = id
198
+
199
+ history = []
200
+
201
+ try:
202
+ for chat in response:
203
+ if chat['role'] == 'user':
204
+ if chat['audio'] != "":
205
+ audio_bytes = base64.b64decode(chat['audio'].encode('utf-8'))
206
+ audio_file = BytesIO(audio_bytes)
207
+ audio_id = secrets.token_hex(8)
208
+ AudioSegment.from_file(audio_file).export(f"temp_audio/{id}/audio_input_{audio_id}.mp3", format="mp3")
209
+ history = history + [((f"temp_audio/{id}/audio_input_{audio_id}.mp3",), None)]
210
+ history = history + [(chat['message'], None)]
211
+ elif chat['role'] == 'assistant':
212
+ audio_bytes = base64.b64decode(chat['audio'].encode('utf-8'))
213
+ audio_file = BytesIO(audio_bytes)
214
+ audio_id = secrets.token_hex(8)
215
+ AudioSegment.from_file(audio_file).export(f"temp_audio/{id}/audio_input_{audio_id}.mp3", format="mp3")
216
+
217
+ history = history + [(None, (f"temp_audio/{id}/audio_input_{audio_id}.mp3",))]
218
+ history = history + [(None, chat['message'])]
219
+ else:
220
+ raise Exception("Invalid chat role")
221
+ except Exception as e:
222
+ raise Exception(f"Response: {response}")
223
+
224
+ chat_history = history.copy()
225
+
226
+ print(f"len(chat_history): {len(chat_history)}\nlen(history): {len(history)}\nlen(response): {len(response)}")
227
+
228
+ return history, gr.update(value="", interactive=False)
229
+
230
  def load_chat_history(history):
231
  global chat_history
232
  if len(chat_history) > len(history):
233
  history = chat_history
234
  return history
235
 
236
+ def main():
237
+ global session_id
238
+ global chat_history
239
 
240
+ session_id = create_chat_session()
241
+ chat_history = []
 
 
242
 
243
+ with gr.Blocks(css=css) as demo:
244
+ with gr.Row():
245
+ # change session id
246
+ change_session_txt = gr.Textbox(
247
  show_label=False,
248
+ placeholder=session_id,
249
  ).style(container=False)
250
+ with gr.Row():
251
+ # button to create new or change session id
252
+ change_session_button = gr.Button(
253
+ "Create new or change session", type='success', size="sm"
254
  ).style(container=False)
255
 
256
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
257
+
258
+ demo.load(load_chat_history, [chatbot], [chatbot], queue=False)
259
+
260
+ with gr.Row():
261
+ with gr.Column(scale=0.85, min_width=0, elem_id="txt_input_container"):
262
+ txt = gr.Textbox(
263
+ show_label=False,
264
+ placeholder="Enter text and press enter, or record audio",
265
+ elem_id="txt_input"
266
+ ).style(container=False)
267
+ with gr.Column(scale=0.15, min_width=0, elem_id="audio_input_container"):
268
+ audio = gr.Audio(
269
+ source="microphone", type="numpy", show_label=False, format="mp3", min_width=0, container=False, elem_id="audio_input"
270
+ )
271
+
272
+ with gr.Row():
273
+ reset_button = gr.Button(
274
+ "Reset Chat Session", type='stop', size="sm"
275
+ ).style(container=False)
276
 
277
+ txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
278
+ bot, chatbot, chatbot
279
+ ).then(
280
+ None, [], [], queue=False, _js=js_audio_auto_play
281
+ )
282
+ txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
283
+
284
+ audio_msg = audio.change(add_audio, [chatbot, audio], [chatbot, audio], queue=False, preprocess=False, postprocess=False).then(
285
+ bot, chatbot, chatbot
286
+ ).then(
287
+ None, [], [], queue=False, _js=js_audio_auto_play
288
+ )
289
+ audio_msg.then(lambda: gr.update(interactive=True, value=None), None, [audio], queue=False)
290
+
291
+ reset_button.click(reset_chat_session, [chatbot], [chatbot], queue=False)
292
+
293
+ chgn_msg = change_session_txt.submit(change_session, [chatbot, change_session_txt], [chatbot, change_session_txt], queue=False)
294
+ chgn_msg.then(lambda: gr.update(interactive=True, placeholder=session_id), None, [change_session_txt], queue=False)
295
+
296
+ create_new_or_change_session_btn = change_session_button.click(create_new_or_change_session, [chatbot, change_session_txt], [chatbot, change_session_txt], queue=False)
297
+ create_new_or_change_session_btn.then(lambda: gr.update(interactive=True, placeholder=session_id), None, [change_session_txt], queue=False)
298
+
299
+ return demo
300
+
301
+
302
+ if __name__ == "__main__":
303
+ # arguments --local
304
+ parser = argparse.ArgumentParser()
305
+ parser.add_argument("--local", action="store_true", help="Use local API endpoint")
306
+ args = parser.parse_args()
307
+
308
+ if args.local:
309
+ API_ENDPOINT = LOCAL_API_ENDPOINT
310
 
311
+ demo = main()
312
+ demo.launch(show_error=True, server_name="0.0.0.0")