IliaLarchenko commited on
Commit
eee97a9
1 Parent(s): 1d5b387

Reduced tts delay

Browse files
Files changed (4) hide show
  1. api/audio.py +1 -5
  2. api/llm.py +9 -28
  3. resources/prompts.py +3 -1
  4. ui/coding.py +68 -5
api/audio.py CHANGED
@@ -230,8 +230,4 @@ class TTSManager:
230
  :return: Generator yielding chunks of audio bytes.
231
  """
232
  if len(chat_history) > 0 and chat_history[-1][1]:
233
- n = len(chat_history) - 1
234
- while n >= 0 and chat_history[n][1]:
235
- n -= 1
236
- for i in range(n + 1, len(chat_history)):
237
- yield from self.read_text(chat_history[i][1])
 
230
  :return: Generator yielding chunks of audio bytes.
231
  """
232
  if len(chat_history) > 0 and chat_history[-1][1]:
233
+ yield from self.read_text(chat_history[-1][1])
 
 
 
 
api/llm.py CHANGED
@@ -59,11 +59,9 @@ class LLMManager:
59
  response = self.client.chat.completions.create(
60
  model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
61
  )
62
- text = ""
63
  for chunk in response:
64
  if chunk.choices[0].delta.content:
65
- text += chunk.choices[0].delta.content
66
- yield text
67
  except Exception as e:
68
  raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
69
 
@@ -109,7 +107,10 @@ class LLMManager:
109
  Get a problem from the LLM based on the given requirements, difficulty, and topic.
110
  """
111
  messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
112
- yield from self.get_text(messages)
 
 
 
113
 
114
  def update_chat_history(
115
  self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
@@ -123,29 +124,6 @@ class LLMManager:
123
  chat_history.append({"role": "user", "content": message})
124
  return chat_history
125
 
126
- def send_request(
127
- self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
128
- ) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str], None, None]:
129
- """
130
- Send a request to the LLM and update the chat display.
131
- """
132
- chat_history = self.update_chat_history(code, previous_code, chat_history, chat_display)
133
- original_len = len(chat_display)
134
- chat_history.append({"role": "assistant", "content": ""})
135
- reply = self.get_text(chat_history)
136
- for message in reply:
137
- chat_history[-1]["content"] = message
138
- text_to_display = message.split("#NOTES#")[0].strip()
139
- split_messages = text_to_display.split("\n\n")
140
- chat_display = chat_display[:original_len]
141
- for m in split_messages:
142
- if m.strip():
143
- chat_display.append([None, m])
144
- if len(chat_display) == original_len:
145
- chat_display.append([None, ""])
146
-
147
- yield chat_history, chat_display, code
148
-
149
  def end_interview_prepare_messages(
150
  self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
151
  ) -> List[Dict[str, str]]:
@@ -171,4 +149,7 @@ class LLMManager:
171
  yield "No interview history available"
172
  return
173
  messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
174
- yield from self.get_text(messages)
 
 
 
 
59
  response = self.client.chat.completions.create(
60
  model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
61
  )
 
62
  for chunk in response:
63
  if chunk.choices[0].delta.content:
64
+ yield chunk.choices[0].delta.content
 
65
  except Exception as e:
66
  raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
67
 
 
107
  Get a problem from the LLM based on the given requirements, difficulty, and topic.
108
  """
109
  messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
110
+ problem = ""
111
+ for text in self.get_text(messages):
112
+ problem += text
113
+ yield problem
114
 
115
  def update_chat_history(
116
  self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
 
124
  chat_history.append({"role": "user", "content": message})
125
  return chat_history
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def end_interview_prepare_messages(
128
  self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
129
  ) -> List[Dict[str, str]]:
 
149
  yield "No interview history available"
150
  return
151
  messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
152
+ feedback = ""
153
+ for text in self.get_text(messages):
154
+ feedback += text
155
+ yield feedback
resources/prompts.py CHANGED
@@ -27,7 +27,9 @@ You are an AI conducting an interview. Your role is to manage the interview effe
27
  - Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
28
  - There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
29
 
30
- - If you answer is long add double '\n\n' to split it in smaller logical parts, so it will be easier to read for the candidate.
 
 
31
 
32
  - You should direct the interview strictly rather than helping the candidate solve the problem.
33
  - Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.
 
27
  - Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
28
  - There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
29
 
30
+ - Your visible messages will be read out loud to the candidate.
31
+ - Use mostly plain text, avoid markdown and complex formatting, unless necessary avoid code and formulas in the visible messages.
32
+ - Use '\n\n' to split your message in short logical parts, so it will be easier to read for the candidate.
33
 
34
  - You should direct the interview strictly rather than helping the candidate solve the problem.
35
  - Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.
ui/coding.py CHANGED
@@ -2,8 +2,70 @@ import gradio as gr
2
  import numpy as np
3
  import os
4
 
 
 
5
  from resources.data import fixed_messages, topic_lists
6
  from utils.ui import add_candidate_message, add_interviewer_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def change_code_area(interview_type):
@@ -25,6 +87,8 @@ def change_code_area(interview_type):
25
 
26
 
27
  def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
 
 
28
  with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
29
  chat_history = gr.State([])
30
  previous_code = gr.State("")
@@ -169,11 +233,11 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
169
  send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
170
  fn=lambda: None, outputs=[message]
171
  ).success(
172
- fn=llm.send_request,
173
  inputs=[code, previous_code, chat_history, chat],
174
- outputs=[chat_history, chat, previous_code],
175
- ).success(
176
- fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
177
  ).success(
178
  fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
179
  ).success(
@@ -187,7 +251,6 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
187
  outputs=[transcript, audio_buffer, message],
188
  show_progress="hidden",
189
  )
190
- audio_input.stop_recording(fn=lambda: gr.update(interactive=True), outputs=[send_btn])
191
  else:
192
  audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
193
  fn=lambda: gr.update(interactive=True), outputs=[send_btn]
 
2
  import numpy as np
3
  import os
4
 
5
+ from itertools import chain
6
+
7
  from resources.data import fixed_messages, topic_lists
8
  from utils.ui import add_candidate_message, add_interviewer_message
9
+ from typing import List, Dict, Generator, Optional, Tuple
10
+ from functools import partial
11
+
12
+
13
+ def send_request(
14
+ code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]], llm, tts
15
+ ) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes], None, None]:
16
+ """
17
+ Send a request to the LLM and update the chat display and translate it to speech.
18
+ """
19
+ # TODO: Find the way to simplify it and remove duplication in logic
20
+ chat_history = llm.update_chat_history(code, previous_code, chat_history, chat_display)
21
+ original_len = len(chat_display)
22
+ chat_display.append([None, ""])
23
+ chat_history.append({"role": "assistant", "content": ""})
24
+
25
+ text_chunks = []
26
+ reply = llm.get_text(chat_history)
27
+
28
+ audio_generator = iter(())
29
+ has_text_item = True
30
+ has_audion_item = True
31
+ audio_created = 0
32
+ is_notes = False
33
+
34
+ while has_text_item or has_audion_item:
35
+ try:
36
+ text_chunk = next(reply)
37
+ text_chunks.append(text_chunk)
38
+ has_text_item = True
39
+ except StopIteration:
40
+ has_text_item = False
41
+ chat_history[-1]["content"] = "".join(text_chunks)
42
+
43
+ try:
44
+ audio_chunk = next(audio_generator)
45
+ has_audion_item = True
46
+ except StopIteration:
47
+ audio_chunk = b""
48
+ has_audion_item = False
49
+
50
+ if has_text_item and not is_notes:
51
+ last_message = chat_display[-1][1]
52
+ last_message += text_chunk
53
+
54
+ split_notes = last_message.split("#NOTES#")
55
+ if len(split_notes) > 1:
56
+ is_notes = True
57
+ last_message = split_notes[0]
58
+ split_messages = last_message.split("\n\n")
59
+ chat_display[-1][1] = split_messages[0]
60
+ for m in split_messages[1:]:
61
+ chat_display.append([None, m])
62
+
63
+ if len(chat_display) - original_len > audio_created + has_text_item:
64
+ audio_generator = chain(audio_generator, tts.read_text(chat_display[original_len + audio_created][1]))
65
+ audio_created += 1
66
+ has_audion_item = True
67
+
68
+ yield chat_history, chat_display, code, audio_chunk
69
 
70
 
71
  def change_code_area(interview_type):
 
87
 
88
 
89
  def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
90
+ send_request_partial = partial(send_request, llm=llm, tts=tts)
91
+
92
  with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
93
  chat_history = gr.State([])
94
  previous_code = gr.State("")
 
233
  send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
234
  fn=lambda: None, outputs=[message]
235
  ).success(
236
+ fn=send_request_partial,
237
  inputs=[code, previous_code, chat_history, chat],
238
+ outputs=[chat_history, chat, previous_code, audio_output],
239
+ # ).success(
240
+ # fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
241
  ).success(
242
  fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
243
  ).success(
 
251
  outputs=[transcript, audio_buffer, message],
252
  show_progress="hidden",
253
  )
 
254
  else:
255
  audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
256
  fn=lambda: gr.update(interactive=True), outputs=[send_btn]