Spaces:
Sleeping
Sleeping
slight improvement on history
Browse files- app.py +2 -2
- charles_actor.py +47 -16
app.py
CHANGED
@@ -114,8 +114,8 @@ async def main():
|
|
114 |
pass
|
115 |
if charles_actor is not None:
|
116 |
try:
|
117 |
-
new_environment_state = await charles_actor.get_environment_state.remote()
|
118 |
-
environment_state_ouput.markdown(f"{new_environment_state}")
|
119 |
charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
|
120 |
charles_actor_debug_output.markdown(charles_debug_str)
|
121 |
except Exception as e:
|
|
|
114 |
pass
|
115 |
if charles_actor is not None:
|
116 |
try:
|
117 |
+
# new_environment_state = await charles_actor.get_environment_state.remote()
|
118 |
+
# environment_state_ouput.markdown(f"{new_environment_state}")
|
119 |
charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
|
120 |
charles_actor_debug_output.markdown(charles_debug_str)
|
121 |
except Exception as e:
|
charles_actor.py
CHANGED
@@ -68,12 +68,6 @@ class CharlesActor:
|
|
68 |
table_content = "| Charles Actor debug history |\n| --- |\n"
|
69 |
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
|
70 |
self._charles_actor_debug_output = table_content
|
71 |
-
def preview_debug_output(output):
|
72 |
-
table_content = "| Charles Actor debug history |\n| --- |\n"
|
73 |
-
debug_output_history_copy = debug_output_history.copy()
|
74 |
-
debug_output_history_copy.append(output)
|
75 |
-
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
|
76 |
-
self._charles_actor_debug_output = table_content
|
77 |
|
78 |
self._state = "Waiting for input"
|
79 |
total_video_frames = 0
|
@@ -84,6 +78,11 @@ class CharlesActor:
|
|
84 |
vector_debug = "--n/a--"
|
85 |
|
86 |
process_speech_to_text_future = []
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
while True:
|
89 |
if len(self._debug_queue) > 0:
|
@@ -114,10 +113,6 @@ class CharlesActor:
|
|
114 |
distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
|
115 |
vector_debug = f"{closest_item_key} {distance_debug_str}"
|
116 |
|
117 |
-
|
118 |
-
human_preview_text = ""
|
119 |
-
robot_preview_text = ""
|
120 |
-
|
121 |
if len(process_speech_to_text_future) > 0:
|
122 |
ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
|
123 |
if ready:
|
@@ -128,22 +123,58 @@ class CharlesActor:
|
|
128 |
|
129 |
if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
|
130 |
print(f"Prompt: {prompt}")
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
add_debug_output(f"π¨ {prompt}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
134 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
135 |
human_preview_text = f"π¨β {prompt}"
|
136 |
|
137 |
for new_response in env_state.llm_responses:
|
138 |
-
add_debug_output(f"π€ {new_response}")
|
|
|
|
|
|
|
139 |
if len(env_state.llm_preview):
|
140 |
robot_preview_text = f"π€β {env_state.llm_preview}"
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
if len(human_preview_text) > 0:
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
147 |
await asyncio.sleep(0.01)
|
148 |
loops+=1
|
149 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
|
|
|
68 |
table_content = "| Charles Actor debug history |\n| --- |\n"
|
69 |
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
|
70 |
self._charles_actor_debug_output = table_content
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
self._state = "Waiting for input"
|
73 |
total_video_frames = 0
|
|
|
78 |
vector_debug = "--n/a--"
|
79 |
|
80 |
process_speech_to_text_future = []
|
81 |
+
current_responses = []
|
82 |
+
speech_chunks_per_response = []
|
83 |
+
human_preview_text = ""
|
84 |
+
robot_preview_text = ""
|
85 |
+
|
86 |
|
87 |
while True:
|
88 |
if len(self._debug_queue) > 0:
|
|
|
113 |
distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
|
114 |
vector_debug = f"{closest_item_key} {distance_debug_str}"
|
115 |
|
|
|
|
|
|
|
|
|
116 |
if len(process_speech_to_text_future) > 0:
|
117 |
ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
|
118 |
if ready:
|
|
|
123 |
|
124 |
if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
|
125 |
print(f"Prompt: {prompt}")
|
126 |
+
lines = []
|
127 |
+
for i, response in enumerate(current_responses):
|
128 |
+
line = "π€ " if len(lines) == 0 else "... "
|
129 |
+
line += f"{response} [{speech_chunks_per_response[i]}]"
|
130 |
+
lines.append(line)
|
131 |
+
for line in reversed(lines):
|
132 |
+
add_debug_output(line)
|
133 |
add_debug_output(f"π¨ {prompt}")
|
134 |
+
current_responses = []
|
135 |
+
speech_chunks_per_response = []
|
136 |
+
env_state.llm_preview = ""
|
137 |
+
env_state.llm_responses = []
|
138 |
+
env_state.tts_raw_chunk_ids = []
|
139 |
+
human_preview_text = ""
|
140 |
+
robot_preview_text = ""
|
141 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
142 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
143 |
human_preview_text = f"π¨β {prompt}"
|
144 |
|
145 |
for new_response in env_state.llm_responses:
|
146 |
+
# add_debug_output(f"π€ {new_response}")
|
147 |
+
current_responses.append(new_response)
|
148 |
+
speech_chunks_per_response.append(0)
|
149 |
+
robot_preview_text = ""
|
150 |
if len(env_state.llm_preview):
|
151 |
robot_preview_text = f"π€β {env_state.llm_preview}"
|
152 |
|
153 |
+
for chunk in env_state.tts_raw_chunk_ids:
|
154 |
+
chunk = json.loads(chunk)
|
155 |
+
# prompt = chunk['prompt']
|
156 |
+
response_id = chunk['llm_sentence_id']
|
157 |
+
speech_chunks_per_response[response_id] += 1
|
158 |
+
|
159 |
+
table_content = "| Charles Actor debug history |\n| --- |\n"
|
160 |
+
debug_output_history_copy = debug_output_history.copy()
|
161 |
+
if len(robot_preview_text) > 0:
|
162 |
+
debug_output_history_copy.append(robot_preview_text)
|
163 |
+
lines = []
|
164 |
+
for i, response in enumerate(current_responses):
|
165 |
+
line = "π€ " if len(lines) == 0 else "... "
|
166 |
+
line += f"{response} [{speech_chunks_per_response[i]}]"
|
167 |
+
lines.append(line)
|
168 |
+
for line in reversed(lines):
|
169 |
+
debug_output_history_copy.append(line)
|
170 |
if len(human_preview_text) > 0:
|
171 |
+
debug_output_history_copy.append(human_preview_text)
|
172 |
+
if len(debug_output_history_copy) > 10:
|
173 |
+
debug_output_history_copy.pop(0)
|
174 |
+
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
|
175 |
+
self._charles_actor_debug_output = table_content
|
176 |
+
|
177 |
+
|
178 |
await asyncio.sleep(0.01)
|
179 |
loops+=1
|
180 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
|