Spaces:
Sleeping
Sleeping
got image to vector
Browse files- charles_actor.py +14 -10
- streamlit_av_queue.py +4 -3
charles_actor.py
CHANGED
@@ -2,6 +2,7 @@ import ray
|
|
2 |
import time
|
3 |
import asyncio
|
4 |
import os
|
|
|
5 |
|
6 |
@ray.remote
|
7 |
class CharlesActor:
|
@@ -9,6 +10,7 @@ class CharlesActor:
|
|
9 |
self._needs_init = True
|
10 |
self._system_one_audio_history_output = ""
|
11 |
self._state = "Initializing"
|
|
|
12 |
|
13 |
def get_state(self):
|
14 |
return self._state
|
@@ -54,8 +56,11 @@ class CharlesActor:
|
|
54 |
|
55 |
self._state = "Waiting for input"
|
56 |
total_video_frames = 0
|
|
|
57 |
total_audio_frames = 0
|
58 |
loops = 0
|
|
|
|
|
59 |
|
60 |
process_speech_to_text_future = []
|
61 |
|
@@ -88,19 +93,18 @@ class CharlesActor:
|
|
88 |
self._system_one_audio_history_output = table_content
|
89 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
97 |
|
98 |
-
# update debug output
|
99 |
-
if (total_video_frames >0 or total_audio_frames > 0):
|
100 |
-
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames"
|
101 |
await asyncio.sleep(0.01)
|
102 |
loops+=1
|
103 |
-
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}"
|
104 |
|
105 |
if __name__ == "__main__":
|
106 |
if not ray.is_initialized():
|
|
|
2 |
import time
|
3 |
import asyncio
|
4 |
import os
|
5 |
+
from clip_transform import CLIPTransform
|
6 |
|
7 |
@ray.remote
|
8 |
class CharlesActor:
|
|
|
10 |
self._needs_init = True
|
11 |
self._system_one_audio_history_output = ""
|
12 |
self._state = "Initializing"
|
13 |
+
self._clip_transform = CLIPTransform()
|
14 |
|
15 |
def get_state(self):
|
16 |
return self._state
|
|
|
56 |
|
57 |
self._state = "Waiting for input"
|
58 |
total_video_frames = 0
|
59 |
+
skipped_video_frames = 0
|
60 |
total_audio_frames = 0
|
61 |
loops = 0
|
62 |
+
start_time = time.time()
|
63 |
+
vector_debug = "--n/a--"
|
64 |
|
65 |
process_speech_to_text_future = []
|
66 |
|
|
|
93 |
self._system_one_audio_history_output = table_content
|
94 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
95 |
|
96 |
+
video_frames = await self._streamlit_av_queue.get_video_frames_async()
|
97 |
+
if len(video_frames) > 0:
|
98 |
+
vector_debug = f"found {len(video_frames)} video frames"
|
99 |
+
total_video_frames += 1
|
100 |
+
skipped_video_frames += (len(video_frames) -1)
|
101 |
+
image_as_array = video_frames[-1]
|
102 |
+
last_frame_vector = self._clip_transform.image_to_embeddings(image_as_array)
|
103 |
+
vector_debug = f"Last frame vector: {last_frame_vector.shape}"
|
104 |
|
|
|
|
|
|
|
105 |
await asyncio.sleep(0.01)
|
106 |
loops+=1
|
107 |
+
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
|
108 |
|
109 |
if __name__ == "__main__":
|
110 |
if not ray.is_initialized():
|
streamlit_av_queue.py
CHANGED
@@ -27,13 +27,14 @@ class StreamlitAVQueue:
|
|
27 |
|
28 |
async def queued_video_frames_callback(
|
29 |
self,
|
30 |
-
frames: List[av.
|
31 |
-
) -> av.
|
32 |
try:
|
33 |
for frame in frames:
|
34 |
-
shared_tensor =
|
35 |
shared_tensor_ref = ray.put(shared_tensor)
|
36 |
await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
|
|
|
37 |
except Exception as e:
|
38 |
print (e)
|
39 |
return frames
|
|
|
27 |
|
28 |
async def queued_video_frames_callback(
|
29 |
self,
|
30 |
+
frames: List[av.VideoFrame],
|
31 |
+
) -> av.VideoFrame:
|
32 |
try:
|
33 |
for frame in frames:
|
34 |
+
shared_tensor = frame.to_ndarray(format="rgb24")
|
35 |
shared_tensor_ref = ray.put(shared_tensor)
|
36 |
await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
|
37 |
+
# print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
|
38 |
except Exception as e:
|
39 |
print (e)
|
40 |
return frames
|