Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

sohojoe commited on Sep 10, 2023

Commit

795c382

•

1 Parent(s): 937fa8f

handle false signal of speaker_finsihed

Browse files

Files changed (3) hide show

charles_actor.py +20 -1
respond_to_prompt_actor.py +2 -1
webrtc_av_queue_actor.py +1 -0

charles_actor.py CHANGED Viewed

@@ -100,6 +100,10 @@ class CharlesActor:
         speech_chunks_per_response = []
         human_preview_text = ""
         robot_preview_text = ""
         while True:
@@ -148,7 +152,6 @@ class CharlesActor:
                             line += f"[{speech_chunks_per_response[i]}] {response}  \n"
                         if len(line) > 0:
                             add_debug_output(line)
-                        add_debug_output(f"👨 {prompt}")
                         current_responses = []
                         speech_chunks_per_response = []
                         env_state.llm_preview = ""
@@ -156,8 +159,23 @@ class CharlesActor:
                         env_state.tts_raw_chunk_ids = []
                         human_preview_text = ""
                         robot_preview_text = ""
                         await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
                     elif len(prompt) > 0 and prompt not in prompts_to_ignore:
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
@@ -195,6 +213,7 @@ class CharlesActor:
             # add observations to the environment state
             count = len(self._out_audio_queue)
             is_talking = bool(count > 0)
             frame = self._animator.update(is_talking)
             if self._out_video_queue.full():
                 evicted_item = await self._out_video_queue.get_async()

         speech_chunks_per_response = []
         human_preview_text = ""
         robot_preview_text = ""
+        additional_prompt = None
+        previous_prompt = ""
+        is_talking = False
+        has_spoken_for_this_prompt = False
         while True:
                             line += f"[{speech_chunks_per_response[i]}] {response}  \n"
                         if len(line) > 0:
                             add_debug_output(line)
                         current_responses = []
                         speech_chunks_per_response = []
                         env_state.llm_preview = ""
                         env_state.tts_raw_chunk_ids = []
                         human_preview_text = ""
                         robot_preview_text = ""
+                        if additional_prompt is not None:
+                            prompt = additional_prompt + ". " + prompt
+                        add_debug_output(f"👨 {prompt}")
                         await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
+                        additional_prompt = None
+                        previous_prompt = prompt
+                        is_talking = False
+                        has_spoken_for_this_prompt = False
                     elif len(prompt) > 0 and prompt not in prompts_to_ignore:
+                        # sometimes we get a false signal of speaker_finsihed
+                        # in which case we get new prompts preview before we have spoken
+                        if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
+                            additional_prompt = previous_prompt
+                            has_spoken_for_this_prompt = True
+                            await self._respond_to_prompt_actor.enqueue_prompt.remote("")
+                        if additional_prompt is not None:
+                            prompt = additional_prompt + ". " + prompt
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
             # add observations to the environment state
             count = len(self._out_audio_queue)
             is_talking = bool(count > 0)
+            has_spoken_for_this_prompt = has_spoken_for_this_prompt or is_talking
             frame = self._animator.update(is_talking)
             if self._out_video_queue.full():
                 evicted_item = await self._out_video_queue.get_async()

respond_to_prompt_actor.py CHANGED Viewed

@@ -184,6 +184,7 @@ class RespondToPromptActor:
             speech_output_future,
             ffmpeg_converter_future,
         )
-        await self.prompt_queue.put_async(prompt)
         print("Enqueued prompt")

             speech_output_future,
             ffmpeg_converter_future,
         )
+        if len(prompt) > 0: # handles case where we just want to flush
+            await self.prompt_queue.put_async(prompt)
         print("Enqueued prompt")

webrtc_av_queue_actor.py CHANGED Viewed

@@ -59,6 +59,7 @@ class WebRtcAVQueueActor:
     async def get_out_video_frame(self):
         if self.out_video_queue.empty():
             return None
         while not self.out_video_queue.empty():
             frame = await self.out_video_queue.get_async()
         return frame

     async def get_out_video_frame(self):
         if self.out_video_queue.empty():
             return None
+        frame = None
         while not self.out_video_queue.empty():
             frame = await self.out_video_queue.get_async()
         return frame