init
Browse files- Dockerfile +8 -7
- app.py +94 -69
- infererence.py +4 -39
- requirements.txt +6 -1
- settings.py +3 -1
Dockerfile
CHANGED
@@ -4,15 +4,16 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
4 |
|
5 |
WORKDIR /code
|
6 |
|
7 |
-
|
|
|
|
|
8 |
|
9 |
-
RUN
|
10 |
-
# RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
|
|
|
|
|
12 |
|
13 |
-
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
14 |
-
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
15 |
|
16 |
-
RUN cd esim_py &&
|
17 |
|
18 |
-
CMD ["python3", "app.py"]
|
|
|
4 |
|
5 |
WORKDIR /code
|
6 |
|
7 |
+
RUN apt-get update && apt-get install -y python3 python3-pip cmake python3-pybind11 libeigen3-dev python3-opencv \
|
8 |
+
libopencv-dev libboost-all-dev git libglfw3-dev libosmesa6-dev libgl1-mesa-dev wget mesa-utils vim \
|
9 |
+
&& rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
+
RUN pip3 install --no-cache-dir torch==2.1.2+cpu torchvision==0.16.2+cpu torchaudio==2.1.2+cpu --index-url https://download.pytorch.org/whl/cpu
|
|
|
12 |
|
13 |
+
COPY . .
|
14 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
15 |
|
|
|
|
|
16 |
|
17 |
+
RUN cd esim_py && pip3 install .
|
18 |
|
19 |
+
CMD ["python3", "app.py"]
|
app.py
CHANGED
@@ -2,22 +2,30 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import cv2
|
4 |
import numpy as np
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import esim_py
|
7 |
from infererence import process_events, Ev2Hands
|
8 |
-
from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH
|
9 |
-
|
10 |
|
11 |
-
os.makedirs("temp", exist_ok=True)
|
12 |
ev2hands = Ev2Hands()
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_frames(video_in, trim_in):
|
16 |
cap = cv2.VideoCapture(video_in)
|
17 |
|
18 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
19 |
stop_frame = int(trim_in * fps)
|
20 |
-
|
21 |
print("video fps: " + str(fps))
|
22 |
|
23 |
frames = []
|
@@ -41,25 +49,47 @@ def get_frames(video_in, trim_in):
|
|
41 |
return frames, fps
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def infer(video_inp, trim_in, threshold):
|
|
|
|
|
|
|
46 |
frames, fps = get_frames(video_inp, trim_in)
|
47 |
ts_s = 1 / fps
|
48 |
ts_ns = ts_s * 1e9 # convert s to ns
|
49 |
|
50 |
POS_THRESHOLD = NEG_THRESHOLD = threshold
|
|
|
|
|
|
|
51 |
|
52 |
esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
|
53 |
is_init = False
|
54 |
|
55 |
-
|
56 |
-
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
|
61 |
|
62 |
-
|
|
|
|
|
63 |
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
64 |
frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
|
65 |
|
@@ -73,88 +103,83 @@ def infer(video_inp, trim_in, threshold):
|
|
73 |
events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
|
74 |
data = process_events(events)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
event_video.release()
|
84 |
-
prediction_video.release()
|
85 |
|
86 |
-
|
|
|
|
|
87 |
|
88 |
|
89 |
-
|
90 |
-
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
91 |
-
<div
|
92 |
-
style="
|
93 |
-
display: inline-flex;
|
94 |
-
align-items: center;
|
95 |
-
gap: 0.8rem;
|
96 |
-
font-size: 1.75rem;
|
97 |
-
"
|
98 |
-
>
|
99 |
-
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
|
100 |
-
Pix2Pix Video
|
101 |
-
</h1>
|
102 |
-
</div>
|
103 |
-
<p style="margin-bottom: 10px; font-size: 94%">
|
104 |
-
Apply Instruct Pix2Pix Diffusion to a video
|
105 |
-
</p>
|
106 |
-
</div>
|
107 |
-
"""
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
<
|
113 |
-
Examples by <a href="https://twitter.com/CitizenPlain" target="_blank">Nathan Shipley</a> •
|
114 |
-
Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates 🤗
|
115 |
-
</p>
|
116 |
-
</div>
|
117 |
-
<div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
|
118 |
-
<p>You may also like: </p>
|
119 |
-
<div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
|
120 |
-
|
121 |
-
<svg height="20" width="162" style="margin-left:4px;margin-bottom: 6px;">
|
122 |
-
<a href="https://huggingface.co/spaces/timbrooks/instruct-pix2pix" target="_blank">
|
123 |
-
<image href="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue" src="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue.png" height="20"/>
|
124 |
-
</a>
|
125 |
-
</svg>
|
126 |
-
|
127 |
</div>
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
-
with gr.Blocks(css='style.css') as demo:
|
134 |
with gr.Column(elem_id="col-container"):
|
135 |
-
gr.HTML(title)
|
136 |
with gr.Row():
|
137 |
with gr.Column():
|
138 |
video_inp = gr.Video(label="Video source", elem_id="input-vid")
|
139 |
with gr.Row():
|
140 |
trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
|
141 |
threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
with gr.Column():
|
144 |
event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
|
145 |
-
|
|
|
|
|
|
|
146 |
|
147 |
gr.HTML("""
|
148 |
-
<a style="display:inline-block" href="https://huggingface.co/spaces/
|
149 |
work with longer videos / skip the queue:
|
150 |
""", elem_id="duplicate-container")
|
151 |
|
152 |
submit_btn = gr.Button("Run Ev2Hands")
|
153 |
-
|
154 |
inputs = [video_inp, trim_in, threshold]
|
155 |
-
outputs = [event_frame_out, prediction_out]
|
156 |
-
gr.HTML(article)
|
157 |
-
|
158 |
-
submit_btn.click(infer, inputs, outputs)
|
159 |
|
|
|
|
|
|
|
160 |
demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)
|
|
|
2 |
import os
|
3 |
import cv2
|
4 |
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
from moviepy.editor import *
|
7 |
+
import tempfile
|
8 |
+
|
9 |
|
10 |
import esim_py
|
11 |
from infererence import process_events, Ev2Hands
|
12 |
+
from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH
|
|
|
13 |
|
|
|
14 |
ev2hands = Ev2Hands()
|
15 |
|
16 |
|
17 |
+
def create_video(frames, fps, path):
|
18 |
+
clip = ImageSequenceClip(frames, fps=fps)
|
19 |
+
clip.write_videofile(path, fps=fps)
|
20 |
+
return path
|
21 |
+
|
22 |
+
|
23 |
def get_frames(video_in, trim_in):
|
24 |
cap = cv2.VideoCapture(video_in)
|
25 |
|
26 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
27 |
stop_frame = int(trim_in * fps)
|
28 |
+
|
29 |
print("video fps: " + str(fps))
|
30 |
|
31 |
frames = []
|
|
|
49 |
return frames, fps
|
50 |
|
51 |
|
52 |
+
def change_model(model_slider, files):
|
53 |
+
if files is None:
|
54 |
+
return None, None
|
55 |
+
|
56 |
+
if model_slider >= len(files):
|
57 |
+
model_slider = len(files)
|
58 |
+
|
59 |
+
idx = int(model_slider - 1)
|
60 |
+
|
61 |
+
mesh_path = files[idx]
|
62 |
+
|
63 |
+
return model_slider, mesh_path
|
64 |
+
|
65 |
|
66 |
def infer(video_inp, trim_in, threshold):
|
67 |
+
if video_inp is None:
|
68 |
+
return None, None, None
|
69 |
+
|
70 |
frames, fps = get_frames(video_inp, trim_in)
|
71 |
ts_s = 1 / fps
|
72 |
ts_ns = ts_s * 1e9 # convert s to ns
|
73 |
|
74 |
POS_THRESHOLD = NEG_THRESHOLD = threshold
|
75 |
+
REF_PERIOD = 0
|
76 |
+
|
77 |
+
print(f'Threshold: {threshold}')
|
78 |
|
79 |
esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
|
80 |
is_init = False
|
81 |
|
82 |
+
temp_folder = f'temp/{next(tempfile._get_candidate_names())}'
|
83 |
+
|
84 |
+
event_frame_vid_path = f'{temp_folder}/event_video.mp4'
|
85 |
+
mesh_folder = f'{temp_folder}/meshes'
|
86 |
|
87 |
+
os.makedirs(temp_folder, exist_ok=True)
|
88 |
+
os.makedirs(mesh_folder, exist_ok=True)
|
|
|
89 |
|
90 |
+
mesh_paths = list()
|
91 |
+
event_frames = list()
|
92 |
+
for idx, frame in enumerate(tqdm(frames)):
|
93 |
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
94 |
frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
|
95 |
|
|
|
103 |
events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
|
104 |
data = process_events(events)
|
105 |
|
106 |
+
mesh = ev2hands(data)
|
107 |
+
mesh_path = f'{mesh_folder}/{idx}.obj'
|
108 |
+
mesh.export(mesh_path)
|
109 |
+
mesh_paths.append(mesh_path)
|
110 |
|
111 |
+
event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
|
112 |
+
event_frames.append(event_frame)
|
|
|
|
|
|
|
113 |
|
114 |
+
create_video(event_frames, fps, event_frame_vid_path)
|
115 |
+
|
116 |
+
return event_frame_vid_path, mesh_paths, mesh_paths[0]
|
117 |
|
118 |
|
119 |
+
with gr.Blocks(css='style.css') as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
+
gr.Markdown(
|
122 |
+
"""
|
123 |
+
<div align="center">
|
124 |
+
<h1>Ev2Hands: 3D Pose Estimation of Two Interacting Hands from a Monocular Event Camera</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
</div>
|
126 |
+
"""
|
127 |
+
)
|
128 |
+
gr.Markdown(
|
129 |
+
"""
|
130 |
+
<p align="center">
|
131 |
+
<a title="Project Page" href="https://4dqv.mpi-inf.mpg.de/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
132 |
+
<img src="https://img.shields.io/badge/Project-Website-5B7493?logo=googlechrome&logoColor=5B7493">
|
133 |
+
</a>
|
134 |
+
<a title="arXiv" href="https://arxiv.org/abs/2312.14157" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
135 |
+
<img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=b31b1b">
|
136 |
+
</a>
|
137 |
+
<a title="GitHub" href="https://github.com/Chris10M/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
138 |
+
<img src="https://img.shields.io/github/stars/Chris10M/Ev2Hands?label=GitHub%20%E2%98%85&&logo=github" alt="badge-github-stars">
|
139 |
+
</a>
|
140 |
+
<a title="Video" href="https://youtu.be/nvES_c5vRfU" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
141 |
+
<img src="https://img.shields.io/badge/YouTube-Video-red?logo=youtube&logoColor=red">
|
142 |
+
</a>
|
143 |
+
<a title="Visitor" href="https://hits.seeyoufarm.com" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
144 |
+
<img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fchris10%2Fev2hands&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false">
|
145 |
+
</a>
|
146 |
+
</p>
|
147 |
+
"""
|
148 |
+
)
|
149 |
|
|
|
150 |
with gr.Column(elem_id="col-container"):
|
151 |
+
# gr.HTML(title)
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
154 |
video_inp = gr.Video(label="Video source", elem_id="input-vid")
|
155 |
with gr.Row():
|
156 |
trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
|
157 |
threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
|
158 |
+
|
159 |
+
gr.Examples(
|
160 |
+
examples=[os.path.join(os.path.dirname(__file__), "examples/video.mp4")],
|
161 |
+
inputs=video_inp,
|
162 |
+
)
|
163 |
+
|
164 |
|
165 |
with gr.Column():
|
166 |
event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
|
167 |
+
|
168 |
+
files = gr.Files(visible=False, label='3D Mesh Files')
|
169 |
+
prediction_out = gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0], label="Ev2Hands result")
|
170 |
+
model_slider = gr.Slider(minimum=1, step=1, label="Frame Number")
|
171 |
|
172 |
gr.HTML("""
|
173 |
+
<a style="display:inline-block" href="https://huggingface.co/spaces/chris10/ev2hands?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
|
174 |
work with longer videos / skip the queue:
|
175 |
""", elem_id="duplicate-container")
|
176 |
|
177 |
submit_btn = gr.Button("Run Ev2Hands")
|
178 |
+
|
179 |
inputs = [video_inp, trim_in, threshold]
|
180 |
+
outputs = [event_frame_out, files, prediction_out]
|
|
|
|
|
|
|
181 |
|
182 |
+
submit_btn.click(infer, inputs, outputs)
|
183 |
+
model_slider.change(change_model, [model_slider, files], [model_slider, prediction_out])
|
184 |
+
|
185 |
demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)
|
infererence.py
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
-
import sys
|
2 |
import os
|
3 |
os.environ['ERPC'] = '1'
|
4 |
|
5 |
-
import esim_py
|
6 |
-
|
7 |
import torch
|
8 |
import cv2
|
9 |
import time
|
10 |
-
import pyrender
|
11 |
import numpy as np
|
12 |
import trimesh
|
13 |
|
@@ -149,23 +145,6 @@ class Ev2Hands:
|
|
149 |
checkpoint = torch.load(save_path, map_location=device)
|
150 |
net.load_state_dict(checkpoint['state_dict'], strict=True)
|
151 |
|
152 |
-
renderer = pyrender.OffscreenRenderer(viewport_width=OUTPUT_WIDTH, viewport_height=OUTPUT_HEIGHT)
|
153 |
-
|
154 |
-
scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3))
|
155 |
-
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
|
156 |
-
light_pose = np.eye(4)
|
157 |
-
light_pose[:3, 3] = np.array([0, -1, 1])
|
158 |
-
scene.add(light, pose=light_pose)
|
159 |
-
light_pose[:3, 3] = np.array([0, 1, 1])
|
160 |
-
scene.add(light, pose=light_pose)
|
161 |
-
light_pose[:3, 3] = np.array([1, 1, 2])
|
162 |
-
scene.add(light, pose=light_pose)
|
163 |
-
|
164 |
-
camera = MAIN_CAMERA
|
165 |
-
nc = pyrender.Node(camera=camera, matrix=np.eye(4))
|
166 |
-
scene.add_node(nc)
|
167 |
-
|
168 |
-
|
169 |
rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
|
170 |
|
171 |
mano_hands = net.hands
|
@@ -173,17 +152,13 @@ class Ev2Hands:
|
|
173 |
self.net = net
|
174 |
self.device = device
|
175 |
self.mano_hands = mano_hands
|
176 |
-
self.rot = rot
|
177 |
-
|
178 |
-
self.scene = scene
|
179 |
-
|
180 |
def __call__(self, data):
|
181 |
net = self.net
|
182 |
device = self.device
|
183 |
mano_hands = self.mano_hands
|
184 |
rot = self.rot
|
185 |
-
renderer = self.renderer
|
186 |
-
scene = self.scene
|
187 |
|
188 |
frame = demo(net=net, device=device, data=data)[0]
|
189 |
seg_mask = frame['seg_mask']
|
@@ -198,15 +173,5 @@ class Ev2Hands:
|
|
198 |
|
199 |
pred_meshes = trimesh.util.concatenate(pred_meshes)
|
200 |
pred_meshes.apply_transform(rot)
|
201 |
-
|
202 |
-
|
203 |
-
mesh_node = pyrender.Node(mesh=pyrender.Mesh.from_trimesh(pred_meshes))
|
204 |
-
scene.add_node(mesh_node)
|
205 |
-
pred_rgb, depth = renderer.render(scene)
|
206 |
-
scene.remove_node(mesh_node)
|
207 |
-
|
208 |
-
pred_rgb = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
|
209 |
-
pred_rgb[pred_rgb == 255] = 0
|
210 |
-
|
211 |
-
return pred_rgb
|
212 |
-
|
|
|
|
|
1 |
import os
|
2 |
os.environ['ERPC'] = '1'
|
3 |
|
|
|
|
|
4 |
import torch
|
5 |
import cv2
|
6 |
import time
|
|
|
7 |
import numpy as np
|
8 |
import trimesh
|
9 |
|
|
|
145 |
checkpoint = torch.load(save_path, map_location=device)
|
146 |
net.load_state_dict(checkpoint['state_dict'], strict=True)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
|
149 |
|
150 |
mano_hands = net.hands
|
|
|
152 |
self.net = net
|
153 |
self.device = device
|
154 |
self.mano_hands = mano_hands
|
155 |
+
self.rot = rot
|
156 |
+
|
|
|
|
|
157 |
def __call__(self, data):
|
158 |
net = self.net
|
159 |
device = self.device
|
160 |
mano_hands = self.mano_hands
|
161 |
rot = self.rot
|
|
|
|
|
162 |
|
163 |
frame = demo(net=net, device=device, data=data)[0]
|
164 |
seg_mask = frame['seg_mask']
|
|
|
173 |
|
174 |
pred_meshes = trimesh.util.concatenate(pred_meshes)
|
175 |
pred_meshes.apply_transform(rot)
|
176 |
+
|
177 |
+
return pred_meshes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -2,4 +2,9 @@ opencv-python
|
|
2 |
git+https://github.com/hassony2/manopth
|
3 |
pyrender
|
4 |
git+https://github.com/mattloper/chumpy.git
|
5 |
-
gradio
|
|
|
|
|
|
|
|
|
|
|
|
2 |
git+https://github.com/hassony2/manopth
|
3 |
pyrender
|
4 |
git+https://github.com/mattloper/chumpy.git
|
5 |
+
gradio
|
6 |
+
Pillow
|
7 |
+
pydantic
|
8 |
+
git+https://github.com/mmatl/pyopengl.git
|
9 |
+
moviepy
|
10 |
+
tqdm
|
settings.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import os
|
2 |
-
if os.name != 'nt':
|
|
|
|
|
3 |
|
4 |
import pyrender
|
5 |
import numpy as np
|
|
|
1 |
import os
|
2 |
+
if os.name != 'nt':
|
3 |
+
os.environ["PYOPENGL_PLATFORM"] = "egl"
|
4 |
+
|
5 |
|
6 |
import pyrender
|
7 |
import numpy as np
|