Spaces:
Runtime error
Runtime error
refactor environment_state_actor.py -> responce_state_manager.py
Browse files- charles_app.py +11 -16
- environment_state_actor.py β responce_state_manager.py +6 -8
- respond_to_prompt_async.py +6 -6
- ui_app.py +0 -2
charles_app.py
CHANGED
@@ -4,7 +4,7 @@ import time
|
|
4 |
import asyncio
|
5 |
import os
|
6 |
from clip_transform import CLIPTransform
|
7 |
-
from
|
8 |
from respond_to_prompt_async import RespondToPromptAsync
|
9 |
import asyncio
|
10 |
import subprocess
|
@@ -13,14 +13,10 @@ class CharlesApp:
|
|
13 |
def __init__(self):
|
14 |
self._needs_init = True
|
15 |
self._charles_actor_debug_output = ""
|
16 |
-
self._environment_state:EnvironmentState = EnvironmentState(episode=0, step=0)
|
17 |
self._state = "Initializing"
|
18 |
self._clip_transform = CLIPTransform()
|
19 |
|
20 |
|
21 |
-
def get_environment_state(self)->EnvironmentState:
|
22 |
-
return self._environment_state
|
23 |
-
|
24 |
def set_state(self, state, skip_print=False):
|
25 |
self._state = state
|
26 |
if not skip_print:
|
@@ -36,8 +32,8 @@ class CharlesApp:
|
|
36 |
self._app_interface_actor = AppInterfaceActor.get_singleton()
|
37 |
self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
|
38 |
|
39 |
-
self.set_state("002 - creating
|
40 |
-
self.
|
41 |
|
42 |
self.set_state("003 - creating PromptManager")
|
43 |
from prompt_manager import PromptManager
|
@@ -102,8 +98,7 @@ class CharlesApp:
|
|
102 |
has_spoken_for_this_prompt = False
|
103 |
|
104 |
while True:
|
105 |
-
|
106 |
-
self._environment_state = env_state
|
107 |
audio_frames = await self._app_interface_actor.dequeue_audio_input_frames_async.remote()
|
108 |
video_frames = await self._app_interface_actor.dequeue_video_input_frames_async.remote()
|
109 |
|
@@ -151,13 +146,13 @@ class CharlesApp:
|
|
151 |
if self._respond_to_prompt_task is not None:
|
152 |
await self._respond_to_prompt.terminate()
|
153 |
self._respond_to_prompt_task.cancel()
|
154 |
-
self._respond_to_prompt = RespondToPromptAsync(self.
|
155 |
self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run(prompt, self._prompt_manager.messages))
|
156 |
additional_prompt = None
|
157 |
previous_prompt = prompt
|
158 |
is_talking = False
|
159 |
has_spoken_for_this_prompt = False
|
160 |
-
|
161 |
current_responses = []
|
162 |
speech_chunks_per_response = []
|
163 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
@@ -171,23 +166,23 @@ class CharlesApp:
|
|
171 |
self._respond_to_prompt_task.cancel()
|
172 |
self._respond_to_prompt_task = None
|
173 |
self._respond_to_prompt = None
|
174 |
-
|
175 |
current_responses = []
|
176 |
speech_chunks_per_response = []
|
177 |
if additional_prompt is not None:
|
178 |
prompt = additional_prompt + ". " + prompt
|
179 |
human_preview_text = f"π¨β {prompt}"
|
180 |
|
181 |
-
for new_response in
|
182 |
# add_debug_output(f"π€ {new_response}")
|
183 |
self._prompt_manager.append_assistant_message(new_response)
|
184 |
current_responses.append(new_response)
|
185 |
speech_chunks_per_response.append(0)
|
186 |
robot_preview_text = ""
|
187 |
-
if len(
|
188 |
-
robot_preview_text = f"π€β {
|
189 |
|
190 |
-
for chunk in
|
191 |
chunk = json.loads(chunk)
|
192 |
# prompt = chunk['prompt']
|
193 |
response_id = chunk['llm_sentence_id']
|
|
|
4 |
import asyncio
|
5 |
import os
|
6 |
from clip_transform import CLIPTransform
|
7 |
+
from responce_state_manager import ResponceStateManager
|
8 |
from respond_to_prompt_async import RespondToPromptAsync
|
9 |
import asyncio
|
10 |
import subprocess
|
|
|
13 |
def __init__(self):
|
14 |
self._needs_init = True
|
15 |
self._charles_actor_debug_output = ""
|
|
|
16 |
self._state = "Initializing"
|
17 |
self._clip_transform = CLIPTransform()
|
18 |
|
19 |
|
|
|
|
|
|
|
20 |
def set_state(self, state, skip_print=False):
|
21 |
self._state = state
|
22 |
if not skip_print:
|
|
|
32 |
self._app_interface_actor = AppInterfaceActor.get_singleton()
|
33 |
self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
|
34 |
|
35 |
+
self.set_state("002 - creating ResponceStateManager")
|
36 |
+
self._responce_state_manager = ResponceStateManager()
|
37 |
|
38 |
self.set_state("003 - creating PromptManager")
|
39 |
from prompt_manager import PromptManager
|
|
|
98 |
has_spoken_for_this_prompt = False
|
99 |
|
100 |
while True:
|
101 |
+
responce_step = self._responce_state_manager.begin_next_step()
|
|
|
102 |
audio_frames = await self._app_interface_actor.dequeue_audio_input_frames_async.remote()
|
103 |
video_frames = await self._app_interface_actor.dequeue_video_input_frames_async.remote()
|
104 |
|
|
|
146 |
if self._respond_to_prompt_task is not None:
|
147 |
await self._respond_to_prompt.terminate()
|
148 |
self._respond_to_prompt_task.cancel()
|
149 |
+
self._respond_to_prompt = RespondToPromptAsync(self._responce_state_manager, self._audio_output_queue)
|
150 |
self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run(prompt, self._prompt_manager.messages))
|
151 |
additional_prompt = None
|
152 |
previous_prompt = prompt
|
153 |
is_talking = False
|
154 |
has_spoken_for_this_prompt = False
|
155 |
+
responce_step = self._responce_state_manager.reset_episode()
|
156 |
current_responses = []
|
157 |
speech_chunks_per_response = []
|
158 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
|
|
166 |
self._respond_to_prompt_task.cancel()
|
167 |
self._respond_to_prompt_task = None
|
168 |
self._respond_to_prompt = None
|
169 |
+
responce_step = self._responce_state_manager.reset_episode()
|
170 |
current_responses = []
|
171 |
speech_chunks_per_response = []
|
172 |
if additional_prompt is not None:
|
173 |
prompt = additional_prompt + ". " + prompt
|
174 |
human_preview_text = f"π¨β {prompt}"
|
175 |
|
176 |
+
for new_response in responce_step.llm_responses:
|
177 |
# add_debug_output(f"π€ {new_response}")
|
178 |
self._prompt_manager.append_assistant_message(new_response)
|
179 |
current_responses.append(new_response)
|
180 |
speech_chunks_per_response.append(0)
|
181 |
robot_preview_text = ""
|
182 |
+
if len(responce_step.llm_preview):
|
183 |
+
robot_preview_text = f"π€β {responce_step.llm_preview}"
|
184 |
|
185 |
+
for chunk in responce_step.tts_raw_chunk_ids:
|
186 |
chunk = json.loads(chunk)
|
187 |
# prompt = chunk['prompt']
|
188 |
response_id = chunk['llm_sentence_id']
|
environment_state_actor.py β responce_state_manager.py
RENAMED
@@ -1,7 +1,6 @@
|
|
1 |
-
import ray
|
2 |
from datetime import datetime
|
3 |
|
4 |
-
class
|
5 |
def __init__(self, episode, step):
|
6 |
self.timestamp = datetime.utcnow()
|
7 |
self.episode = episode
|
@@ -16,8 +15,7 @@ class EnvironmentState:
|
|
16 |
return f'episode={self.episode}, step={self.step}, timestamp={self.timestamp}, \nreward={self.reward}\nstate=({state})'
|
17 |
|
18 |
|
19 |
-
|
20 |
-
class EnvironmentStateActor:
|
21 |
def __init__(self):
|
22 |
self.episode = 0
|
23 |
self.step = 0
|
@@ -27,13 +25,13 @@ class EnvironmentStateActor:
|
|
27 |
def reset_episode(self):
|
28 |
self.episode += 1
|
29 |
self.step = 0
|
30 |
-
self.state =
|
31 |
return self.state
|
32 |
|
33 |
-
def begin_next_step(self)->
|
34 |
previous_state = self.state
|
35 |
self.step += 1
|
36 |
-
self.state =
|
37 |
return previous_state
|
38 |
|
39 |
def add_reward(self, reward):
|
@@ -49,5 +47,5 @@ class EnvironmentStateActor:
|
|
49 |
def add_tts_raw_chunk_id(self, chunk_id):
|
50 |
self.state.tts_raw_chunk_ids.append(chunk_id)
|
51 |
|
52 |
-
def get_state(self)->
|
53 |
return self.state
|
|
|
|
|
1 |
from datetime import datetime
|
2 |
|
3 |
+
class ResponceStep:
|
4 |
def __init__(self, episode, step):
|
5 |
self.timestamp = datetime.utcnow()
|
6 |
self.episode = episode
|
|
|
15 |
return f'episode={self.episode}, step={self.step}, timestamp={self.timestamp}, \nreward={self.reward}\nstate=({state})'
|
16 |
|
17 |
|
18 |
+
class ResponceStateManager:
|
|
|
19 |
def __init__(self):
|
20 |
self.episode = 0
|
21 |
self.step = 0
|
|
|
25 |
def reset_episode(self):
|
26 |
self.episode += 1
|
27 |
self.step = 0
|
28 |
+
self.state = ResponceStep(self.episode, self.step)
|
29 |
return self.state
|
30 |
|
31 |
+
def begin_next_step(self)->ResponceStep:
|
32 |
previous_state = self.state
|
33 |
self.step += 1
|
34 |
+
self.state = ResponceStep(self.episode, self.step)
|
35 |
return previous_state
|
36 |
|
37 |
def add_reward(self, reward):
|
|
|
47 |
def add_tts_raw_chunk_id(self, chunk_id):
|
48 |
self.state.tts_raw_chunk_ids.append(chunk_id)
|
49 |
|
50 |
+
def get_state(self)->ResponceStep:
|
51 |
return self.state
|
respond_to_prompt_async.py
CHANGED
@@ -6,7 +6,7 @@ import ray
|
|
6 |
from chat_service import ChatService
|
7 |
# from local_speaker_service import LocalSpeakerService
|
8 |
from text_to_speech_service import TextToSpeechService
|
9 |
-
from
|
10 |
from ffmpeg_converter import FFMpegConverter
|
11 |
from agent_response import AgentResponse
|
12 |
import json
|
@@ -14,14 +14,14 @@ import json
|
|
14 |
class RespondToPromptAsync:
|
15 |
def __init__(
|
16 |
self,
|
17 |
-
|
18 |
audio_output_queue):
|
19 |
voice_id="2OviOUQc1JsQRQgNkVBj"
|
20 |
self.llm_sentence_queue = Queue(maxsize=100)
|
21 |
self.speech_chunk_queue = Queue(maxsize=100)
|
22 |
self.voice_id = voice_id
|
23 |
self.audio_output_queue = audio_output_queue
|
24 |
-
self.
|
25 |
self.sentence_queues = []
|
26 |
self.sentence_tasks = []
|
27 |
# self.ffmpeg_converter = FFMpegConverter.remote(audio_output_queue)
|
@@ -36,12 +36,12 @@ class RespondToPromptAsync:
|
|
36 |
is_complete_sentance = False
|
37 |
if not is_complete_sentance:
|
38 |
agent_response['llm_preview'] = text
|
39 |
-
|
40 |
continue
|
41 |
agent_response['llm_preview'] = ''
|
42 |
agent_response['llm_sentence'] = text
|
43 |
agent_response['llm_sentences'].append(text)
|
44 |
-
|
45 |
print(f"{agent_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
|
46 |
sentence_response = agent_response.make_copy()
|
47 |
new_queue = Queue()
|
@@ -65,7 +65,7 @@ class RespondToPromptAsync:
|
|
65 |
'chunk_count': chunk_count,
|
66 |
}
|
67 |
chunk_id_json = json.dumps(chunk_response)
|
68 |
-
|
69 |
chunk_count += 1
|
70 |
|
71 |
async def speech_to_converter(self):
|
|
|
6 |
from chat_service import ChatService
|
7 |
# from local_speaker_service import LocalSpeakerService
|
8 |
from text_to_speech_service import TextToSpeechService
|
9 |
+
from responce_state_manager import ResponceStateManager
|
10 |
from ffmpeg_converter import FFMpegConverter
|
11 |
from agent_response import AgentResponse
|
12 |
import json
|
|
|
14 |
class RespondToPromptAsync:
|
15 |
def __init__(
|
16 |
self,
|
17 |
+
responce_state_manager:ResponceStateManager,
|
18 |
audio_output_queue):
|
19 |
voice_id="2OviOUQc1JsQRQgNkVBj"
|
20 |
self.llm_sentence_queue = Queue(maxsize=100)
|
21 |
self.speech_chunk_queue = Queue(maxsize=100)
|
22 |
self.voice_id = voice_id
|
23 |
self.audio_output_queue = audio_output_queue
|
24 |
+
self.responce_state_manager = responce_state_manager
|
25 |
self.sentence_queues = []
|
26 |
self.sentence_tasks = []
|
27 |
# self.ffmpeg_converter = FFMpegConverter.remote(audio_output_queue)
|
|
|
36 |
is_complete_sentance = False
|
37 |
if not is_complete_sentance:
|
38 |
agent_response['llm_preview'] = text
|
39 |
+
self.responce_state_manager.set_llm_preview(text)
|
40 |
continue
|
41 |
agent_response['llm_preview'] = ''
|
42 |
agent_response['llm_sentence'] = text
|
43 |
agent_response['llm_sentences'].append(text)
|
44 |
+
self.responce_state_manager.add_llm_response_and_clear_llm_preview(text)
|
45 |
print(f"{agent_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
|
46 |
sentence_response = agent_response.make_copy()
|
47 |
new_queue = Queue()
|
|
|
65 |
'chunk_count': chunk_count,
|
66 |
}
|
67 |
chunk_id_json = json.dumps(chunk_response)
|
68 |
+
self.responce_state_manager.add_tts_raw_chunk_id(chunk_id_json)
|
69 |
chunk_count += 1
|
70 |
|
71 |
async def speech_to_converter(self):
|
ui_app.py
CHANGED
@@ -140,8 +140,6 @@ async def main():
|
|
140 |
# continue
|
141 |
# system_one_audio_status.write("Found Charles actor.")
|
142 |
try:
|
143 |
-
# new_environment_state = await charles_actor.get_environment_state.remote()
|
144 |
-
# environment_state_ouput.markdown(f"{new_environment_state}")
|
145 |
streamlit_av_queue.set_looking_listening(looking, listening)
|
146 |
charles_debug_str = await app_interface_instance.get_debug_output.remote()
|
147 |
charles_actor_debug_output.markdown(charles_debug_str)
|
|
|
140 |
# continue
|
141 |
# system_one_audio_status.write("Found Charles actor.")
|
142 |
try:
|
|
|
|
|
143 |
streamlit_av_queue.set_looking_listening(looking, listening)
|
144 |
charles_debug_str = await app_interface_instance.get_debug_output.remote()
|
145 |
charles_actor_debug_output.markdown(charles_debug_str)
|