chris10 commited on
Commit
d965e49
1 Parent(s): 9564652
Files changed (5) hide show
  1. Dockerfile +8 -7
  2. app.py +94 -69
  3. infererence.py +4 -39
  4. requirements.txt +6 -1
  5. settings.py +3 -1
Dockerfile CHANGED
@@ -4,15 +4,16 @@ ENV DEBIAN_FRONTEND=noninteractive
4
 
5
  WORKDIR /code
6
 
7
- COPY . .
 
 
8
 
9
- RUN apt-get update && apt-get install -y python3 python3-pip cmake python3-pybind11 python3-opencv libopencv-dev libboost-all-dev git libglfw3-dev libgles2-mesa-dev
10
- # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
 
 
12
 
13
- RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
14
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
 
16
- RUN cd esim_py && pip install .
17
 
18
- CMD ["python3", "app.py"]
 
4
 
5
  WORKDIR /code
6
 
7
+ RUN apt-get update && apt-get install -y python3 python3-pip cmake python3-pybind11 libeigen3-dev python3-opencv \
8
+ libopencv-dev libboost-all-dev git libglfw3-dev libosmesa6-dev libgl1-mesa-dev wget mesa-utils vim \
9
+ && rm -rf /var/lib/apt/lists/*
10
 
11
+ RUN pip3 install --no-cache-dir torch==2.1.2+cpu torchvision==0.16.2+cpu torchaudio==2.1.2+cpu --index-url https://download.pytorch.org/whl/cpu
 
12
 
13
+ COPY . .
14
+ RUN pip3 install --no-cache-dir -r requirements.txt
15
 
 
 
16
 
17
+ RUN cd esim_py && pip3 install .
18
 
19
+ CMD ["python3", "app.py"]
app.py CHANGED
@@ -2,22 +2,30 @@ import gradio as gr
2
  import os
3
  import cv2
4
  import numpy as np
 
 
 
 
5
 
6
  import esim_py
7
  from infererence import process_events, Ev2Hands
8
- from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, REF_PERIOD
9
-
10
 
11
- os.makedirs("temp", exist_ok=True)
12
  ev2hands = Ev2Hands()
13
 
14
 
 
 
 
 
 
 
15
  def get_frames(video_in, trim_in):
16
  cap = cv2.VideoCapture(video_in)
17
 
18
  fps = cap.get(cv2.CAP_PROP_FPS)
19
  stop_frame = int(trim_in * fps)
20
-
21
  print("video fps: " + str(fps))
22
 
23
  frames = []
@@ -41,25 +49,47 @@ def get_frames(video_in, trim_in):
41
  return frames, fps
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def infer(video_inp, trim_in, threshold):
 
 
 
46
  frames, fps = get_frames(video_inp, trim_in)
47
  ts_s = 1 / fps
48
  ts_ns = ts_s * 1e9 # convert s to ns
49
 
50
  POS_THRESHOLD = NEG_THRESHOLD = threshold
 
 
 
51
 
52
  esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
53
  is_init = False
54
 
55
- event_frame_vid_path = 'temp/event_video.mp4'
56
- prediction_vid_path = 'temp/prediction_video.mp4'
 
 
57
 
58
- height, width, _ = frames[0].shape
59
- event_video = cv2.VideoWriter(event_frame_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
60
- prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
61
 
62
- for idx, frame in enumerate(frames):
 
 
63
  frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
64
  frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
65
 
@@ -73,88 +103,83 @@ def infer(video_inp, trim_in, threshold):
73
  events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
74
  data = process_events(events)
75
 
76
- prediction_frame = ev2hands(data)
77
- event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
78
-
 
79
 
80
- event_video.write(event_frame)
81
- prediction_video.write(prediction_frame)
82
-
83
- event_video.release()
84
- prediction_video.release()
85
 
86
- return event_frame_vid_path, prediction_vid_path
 
 
87
 
88
 
89
- title = """
90
- <div style="text-align: center; max-width: 700px; margin: 0 auto;">
91
- <div
92
- style="
93
- display: inline-flex;
94
- align-items: center;
95
- gap: 0.8rem;
96
- font-size: 1.75rem;
97
- "
98
- >
99
- <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
100
- Pix2Pix Video
101
- </h1>
102
- </div>
103
- <p style="margin-bottom: 10px; font-size: 94%">
104
- Apply Instruct Pix2Pix Diffusion to a video
105
- </p>
106
- </div>
107
- """
108
 
109
- article = """
110
-
111
- <div class="footer">
112
- <p>
113
- Examples by <a href="https://twitter.com/CitizenPlain" target="_blank">Nathan Shipley</a> •&nbsp;
114
- Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates 🤗
115
- </p>
116
- </div>
117
- <div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
118
- <p>You may also like: </p>
119
- <div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
120
-
121
- <svg height="20" width="162" style="margin-left:4px;margin-bottom: 6px;">
122
- <a href="https://huggingface.co/spaces/timbrooks/instruct-pix2pix" target="_blank">
123
- <image href="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue" src="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue.png" height="20"/>
124
- </a>
125
- </svg>
126
-
127
  </div>
128
-
129
- </div>
130
-
131
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- with gr.Blocks(css='style.css') as demo:
134
  with gr.Column(elem_id="col-container"):
135
- gr.HTML(title)
136
  with gr.Row():
137
  with gr.Column():
138
  video_inp = gr.Video(label="Video source", elem_id="input-vid")
139
  with gr.Row():
140
  trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
141
  threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
 
 
 
 
 
 
142
 
143
  with gr.Column():
144
  event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
145
- prediction_out = gr.Video(label="Ev2Hands result", elem_id="video-output")
 
 
 
146
 
147
  gr.HTML("""
148
- <a style="display:inline-block" href="https://huggingface.co/spaces/fffiloni/Pix2Pix-Video?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
149
  work with longer videos / skip the queue:
150
  """, elem_id="duplicate-container")
151
 
152
  submit_btn = gr.Button("Run Ev2Hands")
153
-
154
  inputs = [video_inp, trim_in, threshold]
155
- outputs = [event_frame_out, prediction_out]
156
- gr.HTML(article)
157
-
158
- submit_btn.click(infer, inputs, outputs)
159
 
 
 
 
160
  demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)
 
2
  import os
3
  import cv2
4
  import numpy as np
5
+ from tqdm import tqdm
6
+ from moviepy.editor import *
7
+ import tempfile
8
+
9
 
10
  import esim_py
11
  from infererence import process_events, Ev2Hands
12
+ from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH
 
13
 
 
14
  ev2hands = Ev2Hands()
15
 
16
 
17
+ def create_video(frames, fps, path):
18
+ clip = ImageSequenceClip(frames, fps=fps)
19
+ clip.write_videofile(path, fps=fps)
20
+ return path
21
+
22
+
23
  def get_frames(video_in, trim_in):
24
  cap = cv2.VideoCapture(video_in)
25
 
26
  fps = cap.get(cv2.CAP_PROP_FPS)
27
  stop_frame = int(trim_in * fps)
28
+
29
  print("video fps: " + str(fps))
30
 
31
  frames = []
 
49
  return frames, fps
50
 
51
 
52
+ def change_model(model_slider, files):
53
+ if files is None:
54
+ return None, None
55
+
56
+ if model_slider >= len(files):
57
+ model_slider = len(files)
58
+
59
+ idx = int(model_slider - 1)
60
+
61
+ mesh_path = files[idx]
62
+
63
+ return model_slider, mesh_path
64
+
65
 
66
  def infer(video_inp, trim_in, threshold):
67
+ if video_inp is None:
68
+ return None, None, None
69
+
70
  frames, fps = get_frames(video_inp, trim_in)
71
  ts_s = 1 / fps
72
  ts_ns = ts_s * 1e9 # convert s to ns
73
 
74
  POS_THRESHOLD = NEG_THRESHOLD = threshold
75
+ REF_PERIOD = 0
76
+
77
+ print(f'Threshold: {threshold}')
78
 
79
  esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
80
  is_init = False
81
 
82
+ temp_folder = f'temp/{next(tempfile._get_candidate_names())}'
83
+
84
+ event_frame_vid_path = f'{temp_folder}/event_video.mp4'
85
+ mesh_folder = f'{temp_folder}/meshes'
86
 
87
+ os.makedirs(temp_folder, exist_ok=True)
88
+ os.makedirs(mesh_folder, exist_ok=True)
 
89
 
90
+ mesh_paths = list()
91
+ event_frames = list()
92
+ for idx, frame in enumerate(tqdm(frames)):
93
  frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
94
  frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
95
 
 
103
  events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
104
  data = process_events(events)
105
 
106
+ mesh = ev2hands(data)
107
+ mesh_path = f'{mesh_folder}/{idx}.obj'
108
+ mesh.export(mesh_path)
109
+ mesh_paths.append(mesh_path)
110
 
111
+ event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
112
+ event_frames.append(event_frame)
 
 
 
113
 
114
+ create_video(event_frames, fps, event_frame_vid_path)
115
+
116
+ return event_frame_vid_path, mesh_paths, mesh_paths[0]
117
 
118
 
119
+ with gr.Blocks(css='style.css') as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ gr.Markdown(
122
+ """
123
+ <div align="center">
124
+ <h1>Ev2Hands: 3D Pose Estimation of Two Interacting Hands from a Monocular Event Camera</h1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  </div>
126
+ """
127
+ )
128
+ gr.Markdown(
129
+ """
130
+ <p align="center">
131
+ <a title="Project Page" href="https://4dqv.mpi-inf.mpg.de/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
132
+ <img src="https://img.shields.io/badge/Project-Website-5B7493?logo=googlechrome&logoColor=5B7493">
133
+ </a>
134
+ <a title="arXiv" href="https://arxiv.org/abs/2312.14157" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
135
+ <img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=b31b1b">
136
+ </a>
137
+ <a title="GitHub" href="https://github.com/Chris10M/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
138
+ <img src="https://img.shields.io/github/stars/Chris10M/Ev2Hands?label=GitHub%20%E2%98%85&&logo=github" alt="badge-github-stars">
139
+ </a>
140
+ <a title="Video" href="https://youtu.be/nvES_c5vRfU" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
141
+ <img src="https://img.shields.io/badge/YouTube-Video-red?logo=youtube&logoColor=red">
142
+ </a>
143
+ <a title="Visitor" href="https://hits.seeyoufarm.com" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
144
+ <img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fchris10%2Fev2hands&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false">
145
+ </a>
146
+ </p>
147
+ """
148
+ )
149
 
 
150
  with gr.Column(elem_id="col-container"):
151
+ # gr.HTML(title)
152
  with gr.Row():
153
  with gr.Column():
154
  video_inp = gr.Video(label="Video source", elem_id="input-vid")
155
  with gr.Row():
156
  trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
157
  threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
158
+
159
+ gr.Examples(
160
+ examples=[os.path.join(os.path.dirname(__file__), "examples/video.mp4")],
161
+ inputs=video_inp,
162
+ )
163
+
164
 
165
  with gr.Column():
166
  event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
167
+
168
+ files = gr.Files(visible=False, label='3D Mesh Files')
169
+ prediction_out = gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0], label="Ev2Hands result")
170
+ model_slider = gr.Slider(minimum=1, step=1, label="Frame Number")
171
 
172
  gr.HTML("""
173
+ <a style="display:inline-block" href="https://huggingface.co/spaces/chris10/ev2hands?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
174
  work with longer videos / skip the queue:
175
  """, elem_id="duplicate-container")
176
 
177
  submit_btn = gr.Button("Run Ev2Hands")
178
+
179
  inputs = [video_inp, trim_in, threshold]
180
+ outputs = [event_frame_out, files, prediction_out]
 
 
 
181
 
182
+ submit_btn.click(infer, inputs, outputs)
183
+ model_slider.change(change_model, [model_slider, files], [model_slider, prediction_out])
184
+
185
  demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)
infererence.py CHANGED
@@ -1,13 +1,9 @@
1
- import sys
2
  import os
3
  os.environ['ERPC'] = '1'
4
 
5
- import esim_py
6
-
7
  import torch
8
  import cv2
9
  import time
10
- import pyrender
11
  import numpy as np
12
  import trimesh
13
 
@@ -149,23 +145,6 @@ class Ev2Hands:
149
  checkpoint = torch.load(save_path, map_location=device)
150
  net.load_state_dict(checkpoint['state_dict'], strict=True)
151
 
152
- renderer = pyrender.OffscreenRenderer(viewport_width=OUTPUT_WIDTH, viewport_height=OUTPUT_HEIGHT)
153
-
154
- scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3))
155
- light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
156
- light_pose = np.eye(4)
157
- light_pose[:3, 3] = np.array([0, -1, 1])
158
- scene.add(light, pose=light_pose)
159
- light_pose[:3, 3] = np.array([0, 1, 1])
160
- scene.add(light, pose=light_pose)
161
- light_pose[:3, 3] = np.array([1, 1, 2])
162
- scene.add(light, pose=light_pose)
163
-
164
- camera = MAIN_CAMERA
165
- nc = pyrender.Node(camera=camera, matrix=np.eye(4))
166
- scene.add_node(nc)
167
-
168
-
169
  rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
170
 
171
  mano_hands = net.hands
@@ -173,17 +152,13 @@ class Ev2Hands:
173
  self.net = net
174
  self.device = device
175
  self.mano_hands = mano_hands
176
- self.rot = rot
177
- self.renderer = renderer
178
- self.scene = scene
179
-
180
  def __call__(self, data):
181
  net = self.net
182
  device = self.device
183
  mano_hands = self.mano_hands
184
  rot = self.rot
185
- renderer = self.renderer
186
- scene = self.scene
187
 
188
  frame = demo(net=net, device=device, data=data)[0]
189
  seg_mask = frame['seg_mask']
@@ -198,15 +173,5 @@ class Ev2Hands:
198
 
199
  pred_meshes = trimesh.util.concatenate(pred_meshes)
200
  pred_meshes.apply_transform(rot)
201
-
202
-
203
- mesh_node = pyrender.Node(mesh=pyrender.Mesh.from_trimesh(pred_meshes))
204
- scene.add_node(mesh_node)
205
- pred_rgb, depth = renderer.render(scene)
206
- scene.remove_node(mesh_node)
207
-
208
- pred_rgb = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
209
- pred_rgb[pred_rgb == 255] = 0
210
-
211
- return pred_rgb
212
-
 
 
1
  import os
2
  os.environ['ERPC'] = '1'
3
 
 
 
4
  import torch
5
  import cv2
6
  import time
 
7
  import numpy as np
8
  import trimesh
9
 
 
145
  checkpoint = torch.load(save_path, map_location=device)
146
  net.load_state_dict(checkpoint['state_dict'], strict=True)
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
149
 
150
  mano_hands = net.hands
 
152
  self.net = net
153
  self.device = device
154
  self.mano_hands = mano_hands
155
+ self.rot = rot
156
+
 
 
157
  def __call__(self, data):
158
  net = self.net
159
  device = self.device
160
  mano_hands = self.mano_hands
161
  rot = self.rot
 
 
162
 
163
  frame = demo(net=net, device=device, data=data)[0]
164
  seg_mask = frame['seg_mask']
 
173
 
174
  pred_meshes = trimesh.util.concatenate(pred_meshes)
175
  pred_meshes.apply_transform(rot)
176
+
177
+ return pred_meshes
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -2,4 +2,9 @@ opencv-python
2
  git+https://github.com/hassony2/manopth
3
  pyrender
4
  git+https://github.com/mattloper/chumpy.git
5
- gradio
 
 
 
 
 
 
2
  git+https://github.com/hassony2/manopth
3
  pyrender
4
  git+https://github.com/mattloper/chumpy.git
5
+ gradio
6
+ Pillow
7
+ pydantic
8
+ git+https://github.com/mmatl/pyopengl.git
9
+ moviepy
10
+ tqdm
settings.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
- if os.name != 'nt': os.environ["PYOPENGL_PLATFORM"] = "egl"
 
 
3
 
4
  import pyrender
5
  import numpy as np
 
1
  import os
2
+ if os.name != 'nt':
3
+ os.environ["PYOPENGL_PLATFORM"] = "egl"
4
+
5
 
6
  import pyrender
7
  import numpy as np