IliaLarchenko commited on
Commit
bbb0e13
·
1 Parent(s): 939b9ab

Improved audion input waiting, refactored the rest

Browse files
Files changed (1) hide show
  1. ui/coding.py +65 -42
ui/coding.py CHANGED
@@ -1,17 +1,20 @@
1
  import gradio as gr
2
  import numpy as np
3
  import os
4
-
5
- from itertools import chain
6
  import time
 
 
 
7
 
8
  from resources.data import fixed_messages, topic_lists, interview_types
9
  from utils.ui import add_candidate_message, add_interviewer_message
10
- from typing import List, Dict, Generator, Optional, Tuple
11
- from functools import partial
12
  from api.llm import LLMManager
13
  from api.audio import TTSManager, STTManager
14
 
 
 
 
 
15
 
16
  def send_request(
17
  code: str,
@@ -23,9 +26,23 @@ def send_request(
23
  silent: Optional[bool] = False,
24
  ) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes], None, None]:
25
  """
26
- Send a request to the LLM and update the chat display and translate it to speech.
 
 
 
 
 
 
 
 
 
 
 
 
27
  """
 
28
  # TODO: Find the way to simplify it and remove duplication in logic
 
29
  if silent is None:
30
  silent = os.getenv("SILENT", False)
31
 
@@ -93,7 +110,16 @@ def send_request(
93
  yield chat_history, chat_display, code, b""
94
 
95
 
96
- def change_code_area(interview_type):
 
 
 
 
 
 
 
 
 
97
  if interview_type == "coding":
98
  return gr.update(
99
  label="Please write your code here. You can use any language, but only Python syntax highlighting is available.",
@@ -111,12 +137,22 @@ def change_code_area(interview_type):
111
  )
112
 
113
 
114
- DEMO_MESSAGE = """<span style="color: red;">
115
- This service is running in demo mode with limited performance (e.g. slow voice recognition). For a better experience, run the service locally, refer to the Instruction tab for more details.
116
- </span>"""
 
 
117
 
 
 
 
 
 
 
118
 
119
- def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, default_audio_params: Dict, audio_output):
 
 
120
  send_request_partial = partial(send_request, llm=llm, tts=tts)
121
 
122
  with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
@@ -127,6 +163,8 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
127
  hi_markdown = gr.Markdown(
128
  "<h2 style='text-align: center;'> Hi! I'm here to guide you through a practice session for your technical interview. Choose the interview settings to begin.</h2>\n"
129
  )
 
 
130
  with gr.Row() as init_acc:
131
  with gr.Column(scale=3):
132
  interview_type_select = gr.Dropdown(
@@ -183,6 +221,7 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
183
  )
184
  start_btn = gr.Button("Generate a problem", elem_id=f"start_btn", interactive=not os.getenv("IS_DEMO", False))
185
 
 
186
  with gr.Accordion("Problem statement", open=True, visible=False) as problem_acc:
187
  description = gr.Markdown(elem_id=f"problem_description", line_breaks=True)
188
  with gr.Accordion("Solution", open=True, visible=False) as solution_acc:
@@ -205,7 +244,7 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
205
  with gr.Accordion("Feedback", open=True, visible=False) as feedback_acc:
206
  feedback = gr.Markdown(elem_id=f"feedback", line_breaks=True)
207
 
208
- # Start button click action chain
209
  start_btn.click(fn=add_interviewer_message(fixed_messages["start"]), inputs=[chat], outputs=[chat]).success(
210
  fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
211
  ).success(
@@ -251,53 +290,37 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
251
  fn=llm.end_interview, inputs=[description, chat_history, interview_type_select], outputs=[feedback]
252
  )
253
 
254
- is_recording = gr.State(False)
255
- audio_input.start_recording(fn=lambda: True, outputs=[is_recording])
256
-
257
  hidden_text = gr.State("")
258
  is_transcribing = gr.State(False)
 
259
  audio_input.stream(
260
  stt.process_audio_chunk,
261
  inputs=[audio_input, audio_buffer],
262
  outputs=[audio_buffer, audio_to_transcribe],
263
  show_progress="hidden",
264
  ).success(fn=lambda: True, outputs=[is_transcribing]).success(
265
- fn=stt.transcribe_audio, inputs=[audio_to_transcribe, hidden_text], outputs=[hidden_text], show_progress="full"
266
  ).success(
267
- fn=stt.add_to_chat, inputs=[hidden_text, chat, is_recording], outputs=[chat], show_progress="full"
268
  ).success(
269
  fn=lambda: False, outputs=[is_transcribing]
270
  )
271
 
272
- # Ugly but works, need to clean up the code and find a better way to handle the logic
273
- # Main problem - we need to wait until the last chunk of audio is transcribed before sending the request
274
- # The same time I don't want to have a fixed delay by default
275
- # I didn't find a native way of gradio to handle this, so I used a workaround
276
- # There should be a better way to handle this, but I didn't find it yet
277
- # The solution below keeps waiting 0.5 second up to 8 times until the audio is transcribed
278
- audio_input.stop_recording(fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]).success(
279
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
280
- ).success(fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]).success(
281
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
282
- ).success(
283
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
284
- ).success(
285
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
286
- ).success(
287
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
288
- ).success(
289
- fn=lambda x: time.sleep(int(x) / 2), inputs=[is_transcribing]
290
- ).success(
291
- fn=lambda: False, outputs=[is_recording]
292
- ).success(
293
  fn=send_request_partial,
294
  inputs=[code, previous_code, chat_history, chat],
295
  outputs=[chat_history, chat, previous_code, audio_output],
296
- ).success(
297
- fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
298
- ).success(
299
- fn=lambda: "", outputs=[hidden_text]
300
- )
301
 
302
  interview_type_select.change(
303
  fn=lambda x: gr.update(choices=topic_lists[x], value=np.random.choice(topic_lists[x])),
 
1
  import gradio as gr
2
  import numpy as np
3
  import os
 
 
4
  import time
5
+ from itertools import chain
6
+ from typing import List, Dict, Generator, Optional, Tuple, Any
7
+ from functools import partial
8
 
9
  from resources.data import fixed_messages, topic_lists, interview_types
10
  from utils.ui import add_candidate_message, add_interviewer_message
 
 
11
  from api.llm import LLMManager
12
  from api.audio import TTSManager, STTManager
13
 
14
+ DEMO_MESSAGE: str = """<span style="color: red;">
15
+ This service is running in demo mode with limited performance (e.g. slow voice recognition). For a better experience, run the service locally, refer to the Instruction tab for more details.
16
+ </span>"""
17
+
18
 
19
  def send_request(
20
  code: str,
 
26
  silent: Optional[bool] = False,
27
  ) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes], None, None]:
28
  """
29
+ Send a request to the LLM and process the response.
30
+
31
+ Args:
32
+ code (str): Current code.
33
+ previous_code (str): Previous code.
34
+ chat_history (List[Dict[str, str]]): Current chat history.
35
+ chat_display (List[List[Optional[str]]]): Current chat display.
36
+ llm (LLMManager): LLM manager instance.
37
+ tts (Optional[TTSManager]): TTS manager instance.
38
+ silent (Optional[bool]): Whether to silence audio output. Defaults to False.
39
+
40
+ Yields:
41
+ Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes]: Updated chat history, chat display, code, and audio chunk.
42
  """
43
+
44
  # TODO: Find the way to simplify it and remove duplication in logic
45
+
46
  if silent is None:
47
  silent = os.getenv("SILENT", False)
48
 
 
110
  yield chat_history, chat_display, code, b""
111
 
112
 
113
+ def change_code_area(interview_type: str) -> gr.update:
114
+ """
115
+ Update the code area based on the interview type.
116
+
117
+ Args:
118
+ interview_type (str): Type of interview.
119
+
120
+ Returns:
121
+ gr.update: Gradio update object for the code area.
122
+ """
123
  if interview_type == "coding":
124
  return gr.update(
125
  label="Please write your code here. You can use any language, but only Python syntax highlighting is available.",
 
137
  )
138
 
139
 
140
+ def get_problem_solving_ui(
141
+ llm: LLMManager, tts: TTSManager, stt: STTManager, default_audio_params: Dict[str, Any], audio_output: gr.Audio
142
+ ) -> gr.Tab:
143
+ """
144
+ Create the problem-solving UI for the interview application.
145
 
146
+ Args:
147
+ llm (LLMManager): LLM manager instance.
148
+ tts (TTSManager): TTS manager instance.
149
+ stt (STTManager): STT manager instance.
150
+ default_audio_params (Dict[str, Any]): Default audio parameters.
151
+ audio_output (gr.Audio): Gradio audio output component.
152
 
153
+ Returns:
154
+ gr.Tab: Gradio tab containing the problem-solving UI.
155
+ """
156
  send_request_partial = partial(send_request, llm=llm, tts=tts)
157
 
158
  with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
 
163
  hi_markdown = gr.Markdown(
164
  "<h2 style='text-align: center;'> Hi! I'm here to guide you through a practice session for your technical interview. Choose the interview settings to begin.</h2>\n"
165
  )
166
+
167
+ # UI components for interview settings
168
  with gr.Row() as init_acc:
169
  with gr.Column(scale=3):
170
  interview_type_select = gr.Dropdown(
 
221
  )
222
  start_btn = gr.Button("Generate a problem", elem_id=f"start_btn", interactive=not os.getenv("IS_DEMO", False))
223
 
224
+ # Problem statement and solution components
225
  with gr.Accordion("Problem statement", open=True, visible=False) as problem_acc:
226
  description = gr.Markdown(elem_id=f"problem_description", line_breaks=True)
227
  with gr.Accordion("Solution", open=True, visible=False) as solution_acc:
 
244
  with gr.Accordion("Feedback", open=True, visible=False) as feedback_acc:
245
  feedback = gr.Markdown(elem_id=f"feedback", line_breaks=True)
246
 
247
+ # Event handlers
248
  start_btn.click(fn=add_interviewer_message(fixed_messages["start"]), inputs=[chat], outputs=[chat]).success(
249
  fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
250
  ).success(
 
290
  fn=llm.end_interview, inputs=[description, chat_history, interview_type_select], outputs=[feedback]
291
  )
292
 
 
 
 
293
  hidden_text = gr.State("")
294
  is_transcribing = gr.State(False)
295
+
296
  audio_input.stream(
297
  stt.process_audio_chunk,
298
  inputs=[audio_input, audio_buffer],
299
  outputs=[audio_buffer, audio_to_transcribe],
300
  show_progress="hidden",
301
  ).success(fn=lambda: True, outputs=[is_transcribing]).success(
302
+ fn=stt.transcribe_audio, inputs=[audio_to_transcribe, hidden_text], outputs=[hidden_text], show_progress="hidden"
303
  ).success(
304
+ fn=stt.add_to_chat, inputs=[hidden_text, chat], outputs=[chat], show_progress="hidden"
305
  ).success(
306
  fn=lambda: False, outputs=[is_transcribing]
307
  )
308
 
309
+ # We need to wait until the last chunk of audio is transcribed before sending the request
310
+ # I didn't find a native way of gradio to handle this, and used a workaround
311
+ WAIT_TIME = 5
312
+ TIME_STEP = 0.1
313
+ STEPS = int(WAIT_TIME / TIME_STEP)
314
+
315
+ stop_audio_recording = audio_input.stop_recording(fn=lambda x: time.sleep(TIME_STEP) if x else None, inputs=[is_transcribing])
316
+ for _ in range(STEPS - 1):
317
+ stop_audio_recording = stop_audio_recording.success(fn=lambda x: time.sleep(TIME_STEP) if x else None, inputs=[is_transcribing])
318
+
319
+ stop_audio_recording.success(
 
 
 
 
 
 
 
 
 
 
320
  fn=send_request_partial,
321
  inputs=[code, previous_code, chat_history, chat],
322
  outputs=[chat_history, chat, previous_code, audio_output],
323
+ ).then(fn=lambda: (np.array([], dtype=np.int16), "", False), outputs=[audio_buffer, hidden_text, is_transcribing])
 
 
 
 
324
 
325
  interview_type_select.change(
326
  fn=lambda x: gr.update(choices=topic_lists[x], value=np.random.choice(topic_lists[x])),