Spaces:

IliaLarchenko
/

interviewer

Sleeping

App Files Files Community

IliaLarchenko commited on May 19, 2024

Commit

eee97a9

1 Parent(s): 1d5b387

Reduced tts delay

Browse files

Files changed (4) hide show

api/audio.py +1 -5
api/llm.py +9 -28
resources/prompts.py +3 -1
ui/coding.py +68 -5

api/audio.py CHANGED Viewed

@@ -230,8 +230,4 @@ class TTSManager:
         :return: Generator yielding chunks of audio bytes.
         """
         if len(chat_history) > 0 and chat_history[-1][1]:
-            n = len(chat_history) - 1
-            while n >= 0 and chat_history[n][1]:
-                n -= 1
-            for i in range(n + 1, len(chat_history)):
-                yield from self.read_text(chat_history[i][1])

         :return: Generator yielding chunks of audio bytes.
         """
         if len(chat_history) > 0 and chat_history[-1][1]:
+            yield from self.read_text(chat_history[-1][1])

api/llm.py CHANGED Viewed

@@ -59,11 +59,9 @@ class LLMManager:
                 response = self.client.chat.completions.create(
                     model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
                 )
-                text = ""
                 for chunk in response:
                     if chunk.choices[0].delta.content:
-                        text += chunk.choices[0].delta.content
-                    yield text
         except Exception as e:
             raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
@@ -109,7 +107,10 @@ class LLMManager:
         Get a problem from the LLM based on the given requirements, difficulty, and topic.
         """
         messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
-        yield from self.get_text(messages)
     def update_chat_history(
         self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
@@ -123,29 +124,6 @@ class LLMManager:
         chat_history.append({"role": "user", "content": message})
         return chat_history
-    def send_request(
-        self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
-    ) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str], None, None]:
-        """
-        Send a request to the LLM and update the chat display.
-        """
-        chat_history = self.update_chat_history(code, previous_code, chat_history, chat_display)
-        original_len = len(chat_display)
-        chat_history.append({"role": "assistant", "content": ""})
-        reply = self.get_text(chat_history)
-        for message in reply:
-            chat_history[-1]["content"] = message
-            text_to_display = message.split("#NOTES#")[0].strip()
-            split_messages = text_to_display.split("\n\n")
-            chat_display = chat_display[:original_len]
-            for m in split_messages:
-                if m.strip():
-                    chat_display.append([None, m])
-            if len(chat_display) == original_len:
-                chat_display.append([None, ""])
-            yield chat_history, chat_display, code
     def end_interview_prepare_messages(
         self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
     ) -> List[Dict[str, str]]:
@@ -171,4 +149,7 @@ class LLMManager:
             yield "No interview history available"
             return
         messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
-        yield from self.get_text(messages)

                 response = self.client.chat.completions.create(
                     model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
                 )
                 for chunk in response:
                     if chunk.choices[0].delta.content:
+                        yield chunk.choices[0].delta.content
         except Exception as e:
             raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
         Get a problem from the LLM based on the given requirements, difficulty, and topic.
         """
         messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
+        problem = ""
+        for text in self.get_text(messages):
+            problem += text
+            yield problem
     def update_chat_history(
         self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
         chat_history.append({"role": "user", "content": message})
         return chat_history
     def end_interview_prepare_messages(
         self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
     ) -> List[Dict[str, str]]:
             yield "No interview history available"
             return
         messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
+        feedback = ""
+        for text in self.get_text(messages):
+            feedback += text
+            yield feedback

resources/prompts.py CHANGED Viewed

@@ -27,7 +27,9 @@ You are an AI conducting an interview. Your role is to manage the interview effe
 - Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
 - There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
-- If you answer is long add double '\n\n' to split it in smaller logical parts, so it will be easier to read for the candidate.
 - You should direct the interview strictly rather than helping the candidate solve the problem.
 - Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.

 - Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
 - There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
+- Your visible messages will be read out loud to the candidate.
+- Use mostly plain text, avoid markdown and complex formatting, unless necessary avoid code and formulas in the visible messages.
+- Use '\n\n' to split your message in short logical parts, so it will be easier to read for the candidate.
 - You should direct the interview strictly rather than helping the candidate solve the problem.
 - Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.

ui/coding.py CHANGED Viewed

@@ -2,8 +2,70 @@ import gradio as gr
 import numpy as np
 import os
 from resources.data import fixed_messages, topic_lists
 from utils.ui import add_candidate_message, add_interviewer_message
 def change_code_area(interview_type):
@@ -25,6 +87,8 @@ def change_code_area(interview_type):
 def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
     with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
         chat_history = gr.State([])
         previous_code = gr.State("")
@@ -169,11 +233,11 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
         send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
             fn=lambda: None, outputs=[message]
         ).success(
-            fn=llm.send_request,
             inputs=[code, previous_code, chat_history, chat],
-            outputs=[chat_history, chat, previous_code],
-        ).success(
-            fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
         ).success(
             fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
         ).success(
@@ -187,7 +251,6 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
                 outputs=[transcript, audio_buffer, message],
                 show_progress="hidden",
             )
-            audio_input.stop_recording(fn=lambda: gr.update(interactive=True), outputs=[send_btn])
         else:
             audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
                 fn=lambda: gr.update(interactive=True), outputs=[send_btn]

 import numpy as np
 import os
+from itertools import chain
 from resources.data import fixed_messages, topic_lists
 from utils.ui import add_candidate_message, add_interviewer_message
+from typing import List, Dict, Generator, Optional, Tuple
+from functools import partial
+def send_request(
+    code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]], llm, tts
+) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes], None, None]:
+    """
+    Send a request to the LLM and update the chat display and translate it to speech.
+    """
+    # TODO: Find the way to simplify it and remove duplication in logic
+    chat_history = llm.update_chat_history(code, previous_code, chat_history, chat_display)
+    original_len = len(chat_display)
+    chat_display.append([None, ""])
+    chat_history.append({"role": "assistant", "content": ""})
+    text_chunks = []
+    reply = llm.get_text(chat_history)
+    audio_generator = iter(())
+    has_text_item = True
+    has_audion_item = True
+    audio_created = 0
+    is_notes = False
+    while has_text_item or has_audion_item:
+        try:
+            text_chunk = next(reply)
+            text_chunks.append(text_chunk)
+            has_text_item = True
+        except StopIteration:
+            has_text_item = False
+            chat_history[-1]["content"] = "".join(text_chunks)
+        try:
+            audio_chunk = next(audio_generator)
+            has_audion_item = True
+        except StopIteration:
+            audio_chunk = b""
+            has_audion_item = False
+        if has_text_item and not is_notes:
+            last_message = chat_display[-1][1]
+            last_message += text_chunk
+            split_notes = last_message.split("#NOTES#")
+            if len(split_notes) > 1:
+                is_notes = True
+            last_message = split_notes[0]
+            split_messages = last_message.split("\n\n")
+            chat_display[-1][1] = split_messages[0]
+            for m in split_messages[1:]:
+                chat_display.append([None, m])
+        if len(chat_display) - original_len > audio_created + has_text_item:
+            audio_generator = chain(audio_generator, tts.read_text(chat_display[original_len + audio_created][1]))
+            audio_created += 1
+            has_audion_item = True
+        yield chat_history, chat_display, code, audio_chunk
 def change_code_area(interview_type):
 def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
+    send_request_partial = partial(send_request, llm=llm, tts=tts)
     with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
         chat_history = gr.State([])
         previous_code = gr.State("")
         send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
             fn=lambda: None, outputs=[message]
         ).success(
+            fn=send_request_partial,
             inputs=[code, previous_code, chat_history, chat],
+            outputs=[chat_history, chat, previous_code, audio_output],
+            # ).success(
+            #     fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
         ).success(
             fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
         ).success(
                 outputs=[transcript, audio_buffer, message],
                 show_progress="hidden",
             )
         else:
             audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
                 fn=lambda: gr.update(interactive=True), outputs=[send_btn]