sohojoe commited on
Commit
32e9dda
Β·
1 Parent(s): 7a1cd88

refactor environment_state_actor.py -> responce_state_manager.py

Browse files
charles_app.py CHANGED
@@ -4,7 +4,7 @@ import time
4
  import asyncio
5
  import os
6
  from clip_transform import CLIPTransform
7
- from environment_state_actor import EnvironmentStateActor, EnvironmentState
8
  from respond_to_prompt_async import RespondToPromptAsync
9
  import asyncio
10
  import subprocess
@@ -13,14 +13,10 @@ class CharlesApp:
13
  def __init__(self):
14
  self._needs_init = True
15
  self._charles_actor_debug_output = ""
16
- self._environment_state:EnvironmentState = EnvironmentState(episode=0, step=0)
17
  self._state = "Initializing"
18
  self._clip_transform = CLIPTransform()
19
 
20
 
21
- def get_environment_state(self)->EnvironmentState:
22
- return self._environment_state
23
-
24
  def set_state(self, state, skip_print=False):
25
  self._state = state
26
  if not skip_print:
@@ -36,8 +32,8 @@ class CharlesApp:
36
  self._app_interface_actor = AppInterfaceActor.get_singleton()
37
  self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
38
 
39
- self.set_state("002 - creating EnvironmentStateActor")
40
- self._environment_state_actor = EnvironmentStateActor.remote()
41
 
42
  self.set_state("003 - creating PromptManager")
43
  from prompt_manager import PromptManager
@@ -102,8 +98,7 @@ class CharlesApp:
102
  has_spoken_for_this_prompt = False
103
 
104
  while True:
105
- env_state = await self._environment_state_actor.begin_next_step.remote()
106
- self._environment_state = env_state
107
  audio_frames = await self._app_interface_actor.dequeue_audio_input_frames_async.remote()
108
  video_frames = await self._app_interface_actor.dequeue_video_input_frames_async.remote()
109
 
@@ -151,13 +146,13 @@ class CharlesApp:
151
  if self._respond_to_prompt_task is not None:
152
  await self._respond_to_prompt.terminate()
153
  self._respond_to_prompt_task.cancel()
154
- self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
155
  self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run(prompt, self._prompt_manager.messages))
156
  additional_prompt = None
157
  previous_prompt = prompt
158
  is_talking = False
159
  has_spoken_for_this_prompt = False
160
- env_state = await self._environment_state_actor.reset_episode.remote()
161
  current_responses = []
162
  speech_chunks_per_response = []
163
  elif len(prompt) > 0 and prompt not in prompts_to_ignore:
@@ -171,23 +166,23 @@ class CharlesApp:
171
  self._respond_to_prompt_task.cancel()
172
  self._respond_to_prompt_task = None
173
  self._respond_to_prompt = None
174
- env_state = await self._environment_state_actor.reset_episode.remote()
175
  current_responses = []
176
  speech_chunks_per_response = []
177
  if additional_prompt is not None:
178
  prompt = additional_prompt + ". " + prompt
179
  human_preview_text = f"πŸ‘¨β“ {prompt}"
180
 
181
- for new_response in env_state.llm_responses:
182
  # add_debug_output(f"πŸ€– {new_response}")
183
  self._prompt_manager.append_assistant_message(new_response)
184
  current_responses.append(new_response)
185
  speech_chunks_per_response.append(0)
186
  robot_preview_text = ""
187
- if len(env_state.llm_preview):
188
- robot_preview_text = f"πŸ€–β“ {env_state.llm_preview}"
189
 
190
- for chunk in env_state.tts_raw_chunk_ids:
191
  chunk = json.loads(chunk)
192
  # prompt = chunk['prompt']
193
  response_id = chunk['llm_sentence_id']
 
4
  import asyncio
5
  import os
6
  from clip_transform import CLIPTransform
7
+ from responce_state_manager import ResponceStateManager
8
  from respond_to_prompt_async import RespondToPromptAsync
9
  import asyncio
10
  import subprocess
 
13
  def __init__(self):
14
  self._needs_init = True
15
  self._charles_actor_debug_output = ""
 
16
  self._state = "Initializing"
17
  self._clip_transform = CLIPTransform()
18
 
19
 
 
 
 
20
  def set_state(self, state, skip_print=False):
21
  self._state = state
22
  if not skip_print:
 
32
  self._app_interface_actor = AppInterfaceActor.get_singleton()
33
  self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
34
 
35
+ self.set_state("002 - creating ResponceStateManager")
36
+ self._responce_state_manager = ResponceStateManager()
37
 
38
  self.set_state("003 - creating PromptManager")
39
  from prompt_manager import PromptManager
 
98
  has_spoken_for_this_prompt = False
99
 
100
  while True:
101
+ responce_step = self._responce_state_manager.begin_next_step()
 
102
  audio_frames = await self._app_interface_actor.dequeue_audio_input_frames_async.remote()
103
  video_frames = await self._app_interface_actor.dequeue_video_input_frames_async.remote()
104
 
 
146
  if self._respond_to_prompt_task is not None:
147
  await self._respond_to_prompt.terminate()
148
  self._respond_to_prompt_task.cancel()
149
+ self._respond_to_prompt = RespondToPromptAsync(self._responce_state_manager, self._audio_output_queue)
150
  self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run(prompt, self._prompt_manager.messages))
151
  additional_prompt = None
152
  previous_prompt = prompt
153
  is_talking = False
154
  has_spoken_for_this_prompt = False
155
+ responce_step = self._responce_state_manager.reset_episode()
156
  current_responses = []
157
  speech_chunks_per_response = []
158
  elif len(prompt) > 0 and prompt not in prompts_to_ignore:
 
166
  self._respond_to_prompt_task.cancel()
167
  self._respond_to_prompt_task = None
168
  self._respond_to_prompt = None
169
+ responce_step = self._responce_state_manager.reset_episode()
170
  current_responses = []
171
  speech_chunks_per_response = []
172
  if additional_prompt is not None:
173
  prompt = additional_prompt + ". " + prompt
174
  human_preview_text = f"πŸ‘¨β“ {prompt}"
175
 
176
+ for new_response in responce_step.llm_responses:
177
  # add_debug_output(f"πŸ€– {new_response}")
178
  self._prompt_manager.append_assistant_message(new_response)
179
  current_responses.append(new_response)
180
  speech_chunks_per_response.append(0)
181
  robot_preview_text = ""
182
+ if len(responce_step.llm_preview):
183
+ robot_preview_text = f"πŸ€–β“ {responce_step.llm_preview}"
184
 
185
+ for chunk in responce_step.tts_raw_chunk_ids:
186
  chunk = json.loads(chunk)
187
  # prompt = chunk['prompt']
188
  response_id = chunk['llm_sentence_id']
environment_state_actor.py β†’ responce_state_manager.py RENAMED
@@ -1,7 +1,6 @@
1
- import ray
2
  from datetime import datetime
3
 
4
- class EnvironmentState:
5
  def __init__(self, episode, step):
6
  self.timestamp = datetime.utcnow()
7
  self.episode = episode
@@ -16,8 +15,7 @@ class EnvironmentState:
16
  return f'episode={self.episode}, step={self.step}, timestamp={self.timestamp}, \nreward={self.reward}\nstate=({state})'
17
 
18
 
19
- @ray.remote
20
- class EnvironmentStateActor:
21
  def __init__(self):
22
  self.episode = 0
23
  self.step = 0
@@ -27,13 +25,13 @@ class EnvironmentStateActor:
27
  def reset_episode(self):
28
  self.episode += 1
29
  self.step = 0
30
- self.state = EnvironmentState(self.episode, self.step)
31
  return self.state
32
 
33
- def begin_next_step(self)->EnvironmentState:
34
  previous_state = self.state
35
  self.step += 1
36
- self.state = EnvironmentState(self.episode, self.step)
37
  return previous_state
38
 
39
  def add_reward(self, reward):
@@ -49,5 +47,5 @@ class EnvironmentStateActor:
49
  def add_tts_raw_chunk_id(self, chunk_id):
50
  self.state.tts_raw_chunk_ids.append(chunk_id)
51
 
52
- def get_state(self)->EnvironmentState:
53
  return self.state
 
 
1
  from datetime import datetime
2
 
3
+ class ResponceStep:
4
  def __init__(self, episode, step):
5
  self.timestamp = datetime.utcnow()
6
  self.episode = episode
 
15
  return f'episode={self.episode}, step={self.step}, timestamp={self.timestamp}, \nreward={self.reward}\nstate=({state})'
16
 
17
 
18
+ class ResponceStateManager:
 
19
  def __init__(self):
20
  self.episode = 0
21
  self.step = 0
 
25
  def reset_episode(self):
26
  self.episode += 1
27
  self.step = 0
28
+ self.state = ResponceStep(self.episode, self.step)
29
  return self.state
30
 
31
+ def begin_next_step(self)->ResponceStep:
32
  previous_state = self.state
33
  self.step += 1
34
+ self.state = ResponceStep(self.episode, self.step)
35
  return previous_state
36
 
37
  def add_reward(self, reward):
 
47
  def add_tts_raw_chunk_id(self, chunk_id):
48
  self.state.tts_raw_chunk_ids.append(chunk_id)
49
 
50
+ def get_state(self)->ResponceStep:
51
  return self.state
respond_to_prompt_async.py CHANGED
@@ -6,7 +6,7 @@ import ray
6
  from chat_service import ChatService
7
  # from local_speaker_service import LocalSpeakerService
8
  from text_to_speech_service import TextToSpeechService
9
- from environment_state_actor import EnvironmentStateActor
10
  from ffmpeg_converter import FFMpegConverter
11
  from agent_response import AgentResponse
12
  import json
@@ -14,14 +14,14 @@ import json
14
  class RespondToPromptAsync:
15
  def __init__(
16
  self,
17
- environment_state_actor:EnvironmentStateActor,
18
  audio_output_queue):
19
  voice_id="2OviOUQc1JsQRQgNkVBj"
20
  self.llm_sentence_queue = Queue(maxsize=100)
21
  self.speech_chunk_queue = Queue(maxsize=100)
22
  self.voice_id = voice_id
23
  self.audio_output_queue = audio_output_queue
24
- self.environment_state_actor = environment_state_actor
25
  self.sentence_queues = []
26
  self.sentence_tasks = []
27
  # self.ffmpeg_converter = FFMpegConverter.remote(audio_output_queue)
@@ -36,12 +36,12 @@ class RespondToPromptAsync:
36
  is_complete_sentance = False
37
  if not is_complete_sentance:
38
  agent_response['llm_preview'] = text
39
- await self.environment_state_actor.set_llm_preview.remote(text)
40
  continue
41
  agent_response['llm_preview'] = ''
42
  agent_response['llm_sentence'] = text
43
  agent_response['llm_sentences'].append(text)
44
- await self.environment_state_actor.add_llm_response_and_clear_llm_preview.remote(text)
45
  print(f"{agent_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
46
  sentence_response = agent_response.make_copy()
47
  new_queue = Queue()
@@ -65,7 +65,7 @@ class RespondToPromptAsync:
65
  'chunk_count': chunk_count,
66
  }
67
  chunk_id_json = json.dumps(chunk_response)
68
- await self.environment_state_actor.add_tts_raw_chunk_id.remote(chunk_id_json)
69
  chunk_count += 1
70
 
71
  async def speech_to_converter(self):
 
6
  from chat_service import ChatService
7
  # from local_speaker_service import LocalSpeakerService
8
  from text_to_speech_service import TextToSpeechService
9
+ from responce_state_manager import ResponceStateManager
10
  from ffmpeg_converter import FFMpegConverter
11
  from agent_response import AgentResponse
12
  import json
 
14
  class RespondToPromptAsync:
15
  def __init__(
16
  self,
17
+ responce_state_manager:ResponceStateManager,
18
  audio_output_queue):
19
  voice_id="2OviOUQc1JsQRQgNkVBj"
20
  self.llm_sentence_queue = Queue(maxsize=100)
21
  self.speech_chunk_queue = Queue(maxsize=100)
22
  self.voice_id = voice_id
23
  self.audio_output_queue = audio_output_queue
24
+ self.responce_state_manager = responce_state_manager
25
  self.sentence_queues = []
26
  self.sentence_tasks = []
27
  # self.ffmpeg_converter = FFMpegConverter.remote(audio_output_queue)
 
36
  is_complete_sentance = False
37
  if not is_complete_sentance:
38
  agent_response['llm_preview'] = text
39
+ self.responce_state_manager.set_llm_preview(text)
40
  continue
41
  agent_response['llm_preview'] = ''
42
  agent_response['llm_sentence'] = text
43
  agent_response['llm_sentences'].append(text)
44
+ self.responce_state_manager.add_llm_response_and_clear_llm_preview(text)
45
  print(f"{agent_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
46
  sentence_response = agent_response.make_copy()
47
  new_queue = Queue()
 
65
  'chunk_count': chunk_count,
66
  }
67
  chunk_id_json = json.dumps(chunk_response)
68
+ self.responce_state_manager.add_tts_raw_chunk_id(chunk_id_json)
69
  chunk_count += 1
70
 
71
  async def speech_to_converter(self):
ui_app.py CHANGED
@@ -140,8 +140,6 @@ async def main():
140
  # continue
141
  # system_one_audio_status.write("Found Charles actor.")
142
  try:
143
- # new_environment_state = await charles_actor.get_environment_state.remote()
144
- # environment_state_ouput.markdown(f"{new_environment_state}")
145
  streamlit_av_queue.set_looking_listening(looking, listening)
146
  charles_debug_str = await app_interface_instance.get_debug_output.remote()
147
  charles_actor_debug_output.markdown(charles_debug_str)
 
140
  # continue
141
  # system_one_audio_status.write("Found Charles actor.")
142
  try:
 
 
143
  streamlit_av_queue.set_looking_listening(looking, listening)
144
  charles_debug_str = await app_interface_instance.get_debug_output.remote()
145
  charles_actor_debug_output.markdown(charles_debug_str)