ev2hands

Sleeping

App Files Files Community

chris10 commited on Jan 6

Commit

d965e49

•

1 Parent(s): 9564652

init

Browse files

Files changed (5) hide show

Dockerfile +8 -7
app.py +94 -69
infererence.py +4 -39
requirements.txt +6 -1
settings.py +3 -1

Dockerfile CHANGED Viewed

@@ -4,15 +4,16 @@ ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /code
-COPY . .
-RUN apt-get update && apt-get install -y python3 python3-pip cmake python3-pybind11 python3-opencv libopencv-dev libboost-all-dev git libglfw3-dev libgles2-mesa-dev
-# RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-RUN pip install --no-cache-dir  torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-RUN cd esim_py && pip install .
-CMD ["python3", "app.py"]

 WORKDIR /code
+RUN apt-get update && apt-get install -y python3 python3-pip cmake python3-pybind11 libeigen3-dev python3-opencv \
+    libopencv-dev libboost-all-dev git libglfw3-dev libosmesa6-dev libgl1-mesa-dev wget mesa-utils vim \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip3 install --no-cache-dir  torch==2.1.2+cpu torchvision==0.16.2+cpu  torchaudio==2.1.2+cpu --index-url https://download.pytorch.org/whl/cpu
+COPY . .
+RUN pip3 install --no-cache-dir -r requirements.txt
+RUN cd esim_py && pip3 install .
+CMD ["python3", "app.py"]

app.py CHANGED Viewed

@@ -2,22 +2,30 @@ import gradio as gr
 import os
 import cv2
 import numpy as np
 import esim_py
 from infererence import process_events, Ev2Hands
-from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, REF_PERIOD
-os.makedirs("temp", exist_ok=True)
 ev2hands = Ev2Hands()
 def get_frames(video_in, trim_in):
     cap = cv2.VideoCapture(video_in)
     fps = cap.get(cv2.CAP_PROP_FPS)
     stop_frame = int(trim_in * fps)
     print("video fps: " + str(fps))
     frames = []
@@ -41,25 +49,47 @@ def get_frames(video_in, trim_in):
     return frames, fps
 def infer(video_inp, trim_in, threshold):
     frames, fps = get_frames(video_inp, trim_in)
     ts_s = 1 / fps
     ts_ns = ts_s * 1e9 # convert s to ns
     POS_THRESHOLD = NEG_THRESHOLD = threshold
     esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
     is_init = False
-    event_frame_vid_path = 'temp/event_video.mp4'
-    prediction_vid_path = 'temp/prediction_video.mp4'
-    height, width, _ = frames[0].shape
-    event_video = cv2.VideoWriter(event_frame_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
-    prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
-    for idx, frame in enumerate(frames):
         frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
@@ -73,88 +103,83 @@ def infer(video_inp, trim_in, threshold):
         events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
         data = process_events(events)
-        prediction_frame = ev2hands(data)
-        event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
-        event_video.write(event_frame)
-        prediction_video.write(prediction_frame)
-    event_video.release()
-    prediction_video.release()
-    return event_frame_vid_path, prediction_vid_path
-title = """
-    <div style="text-align: center; max-width: 700px; margin: 0 auto;">
-        <div
-        style="
-            display: inline-flex;
-            align-items: center;
-            gap: 0.8rem;
-            font-size: 1.75rem;
-        "
-        >
-        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
-            Pix2Pix Video
-        </h1>
-        </div>
-        <p style="margin-bottom: 10px; font-size: 94%">
-        Apply Instruct Pix2Pix Diffusion to a video
-        </p>
-    </div>
-"""
-article = """
-    <div class="footer">
-        <p>
-        Examples by <a href="https://twitter.com/CitizenPlain" target="_blank">Nathan Shipley</a> •&nbsp;
-        Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates 🤗
-        </p>
-    </div>
-    <div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
-        <p>You may also like: </p>
-        <div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
-            <svg height="20" width="162" style="margin-left:4px;margin-bottom: 6px;">
-                 <a href="https://huggingface.co/spaces/timbrooks/instruct-pix2pix" target="_blank">
-                    <image href="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue" src="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue.png" height="20"/>
-                 </a>
-            </svg>
         </div>
-    </div>
-"""
-with gr.Blocks(css='style.css') as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML(title)
         with gr.Row():
             with gr.Column():
                 video_inp = gr.Video(label="Video source", elem_id="input-vid")
                 with gr.Row():
                     trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
                     threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
             with gr.Column():
                 event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
-                prediction_out = gr.Video(label="Ev2Hands result", elem_id="video-output")
                 gr.HTML("""
-                <a style="display:inline-block" href="https://huggingface.co/spaces/fffiloni/Pix2Pix-Video?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
                 work with longer videos / skip the queue:
                 """, elem_id="duplicate-container")
                 submit_btn = gr.Button("Run Ev2Hands")
         inputs = [video_inp, trim_in, threshold]
-        outputs = [event_frame_out, prediction_out]
-        gr.HTML(article)
-    submit_btn.click(infer, inputs, outputs)
 demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)

 import os
 import cv2
 import numpy as np
+from tqdm import tqdm
+from moviepy.editor import *
+import tempfile
 import esim_py
 from infererence import process_events, Ev2Hands
+from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH
 ev2hands = Ev2Hands()
+def create_video(frames, fps, path):
+    clip = ImageSequenceClip(frames, fps=fps)
+    clip.write_videofile(path, fps=fps)
+    return path
 def get_frames(video_in, trim_in):
     cap = cv2.VideoCapture(video_in)
     fps = cap.get(cv2.CAP_PROP_FPS)
     stop_frame = int(trim_in * fps)
     print("video fps: " + str(fps))
     frames = []
     return frames, fps
+def change_model(model_slider, files):
+    if files is None:
+        return None, None
+    if model_slider >= len(files):
+        model_slider = len(files)
+    idx = int(model_slider - 1)
+    mesh_path = files[idx]
+    return model_slider, mesh_path
 def infer(video_inp, trim_in, threshold):
+    if video_inp is None:
+        return None, None, None
     frames, fps = get_frames(video_inp, trim_in)
     ts_s = 1 / fps
     ts_ns = ts_s * 1e9 # convert s to ns
     POS_THRESHOLD = NEG_THRESHOLD = threshold
+    REF_PERIOD = 0
+    print(f'Threshold: {threshold}')
     esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
     is_init = False
+    temp_folder = f'temp/{next(tempfile._get_candidate_names())}'
+    event_frame_vid_path = f'{temp_folder}/event_video.mp4'
+    mesh_folder = f'{temp_folder}/meshes'
+    os.makedirs(temp_folder, exist_ok=True)
+    os.makedirs(mesh_folder, exist_ok=True)
+    mesh_paths = list()
+    event_frames = list()
+    for idx, frame in enumerate(tqdm(frames)):
         frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
         events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
         data = process_events(events)
+        mesh = ev2hands(data)
+        mesh_path = f'{mesh_folder}/{idx}.obj'
+        mesh.export(mesh_path)
+        mesh_paths.append(mesh_path)
+        event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
+        event_frames.append(event_frame)
+    create_video(event_frames, fps, event_frame_vid_path)
+    return event_frame_vid_path, mesh_paths, mesh_paths[0]
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(
+        """
+        <div align="center">
+        <h1>Ev2Hands: 3D Pose Estimation of Two Interacting Hands from a Monocular Event Camera</h1>
         </div>
+        """
+    )
+    gr.Markdown(
+        """
+        <p align="center">
+                <a title="Project Page" href="https://4dqv.mpi-inf.mpg.de/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                    <img src="https://img.shields.io/badge/Project-Website-5B7493?logo=googlechrome&logoColor=5B7493">
+                </a>
+                <a title="arXiv" href="https://arxiv.org/abs/2312.14157" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                    <img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=b31b1b">
+                </a>
+                <a title="GitHub" href="https://github.com/Chris10M/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                    <img src="https://img.shields.io/github/stars/Chris10M/Ev2Hands?label=GitHub%20%E2%98%85&&logo=github" alt="badge-github-stars">
+                </a>
+                <a title="Video" href="https://youtu.be/nvES_c5vRfU" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                    <img src="https://img.shields.io/badge/YouTube-Video-red?logo=youtube&logoColor=red">
+                </a>
+                <a title="Visitor" href="https://hits.seeyoufarm.com" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                    <img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fchris10%2Fev2hands&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false">
+                </a>
+        </p>
+        """
+    )
     with gr.Column(elem_id="col-container"):
+        # gr.HTML(title)
         with gr.Row():
             with gr.Column():
                 video_inp = gr.Video(label="Video source", elem_id="input-vid")
                 with gr.Row():
                     trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
                     threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)
+                gr.Examples(
+                            examples=[os.path.join(os.path.dirname(__file__), "examples/video.mp4")],
+                            inputs=video_inp,
+                        )
             with gr.Column():
                 event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
+                files = gr.Files(visible=False, label='3D Mesh Files')
+                prediction_out = gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0],  label="Ev2Hands result")
+                model_slider = gr.Slider(minimum=1, step=1, label="Frame Number")
                 gr.HTML("""
+                <a style="display:inline-block" href="https://huggingface.co/spaces/chris10/ev2hands?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
                 work with longer videos / skip the queue:
                 """, elem_id="duplicate-container")
                 submit_btn = gr.Button("Run Ev2Hands")
         inputs = [video_inp, trim_in, threshold]
+        outputs = [event_frame_out, files, prediction_out]
+    submit_btn.click(infer, inputs, outputs)
+    model_slider.change(change_model, [model_slider, files], [model_slider, prediction_out])
 demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)

infererence.py CHANGED Viewed

@@ -1,13 +1,9 @@
-import sys
 import os
 os.environ['ERPC'] = '1'
-import esim_py
 import torch
 import cv2
 import time
-import pyrender
 import numpy as np
 import trimesh
@@ -149,23 +145,6 @@ class Ev2Hands:
         checkpoint = torch.load(save_path, map_location=device)
         net.load_state_dict(checkpoint['state_dict'], strict=True)
-        renderer = pyrender.OffscreenRenderer(viewport_width=OUTPUT_WIDTH, viewport_height=OUTPUT_HEIGHT)
-        scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3))
-        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
-        light_pose = np.eye(4)
-        light_pose[:3, 3] = np.array([0, -1, 1])
-        scene.add(light, pose=light_pose)
-        light_pose[:3, 3] = np.array([0, 1, 1])
-        scene.add(light, pose=light_pose)
-        light_pose[:3, 3] = np.array([1, 1, 2])
-        scene.add(light, pose=light_pose)
-        camera = MAIN_CAMERA
-        nc = pyrender.Node(camera=camera, matrix=np.eye(4))
-        scene.add_node(nc)
         rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
         mano_hands = net.hands
@@ -173,17 +152,13 @@ class Ev2Hands:
         self.net = net
         self.device = device
         self.mano_hands = mano_hands
-        self.rot = rot
-        self.renderer = renderer
-        self.scene = scene
     def __call__(self, data):
         net = self.net
         device = self.device
         mano_hands = self.mano_hands
         rot = self.rot
-        renderer = self.renderer
-        scene = self.scene
         frame = demo(net=net, device=device, data=data)[0]
         seg_mask = frame['seg_mask']
@@ -198,15 +173,5 @@ class Ev2Hands:
         pred_meshes = trimesh.util.concatenate(pred_meshes)
         pred_meshes.apply_transform(rot)
-        mesh_node = pyrender.Node(mesh=pyrender.Mesh.from_trimesh(pred_meshes))
-        scene.add_node(mesh_node)
-        pred_rgb, depth = renderer.render(scene)
-        scene.remove_node(mesh_node)
-        pred_rgb = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
-        pred_rgb[pred_rgb == 255] = 0
-        return pred_rgb

 import os
 os.environ['ERPC'] = '1'
 import torch
 import cv2
 import time
 import numpy as np
 import trimesh
         checkpoint = torch.load(save_path, map_location=device)
         net.load_state_dict(checkpoint['state_dict'], strict=True)
         rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
         mano_hands = net.hands
         self.net = net
         self.device = device
         self.mano_hands = mano_hands
+        self.rot = rot
     def __call__(self, data):
         net = self.net
         device = self.device
         mano_hands = self.mano_hands
         rot = self.rot
         frame = demo(net=net, device=device, data=data)[0]
         seg_mask = frame['seg_mask']
         pred_meshes = trimesh.util.concatenate(pred_meshes)
         pred_meshes.apply_transform(rot)
+        return pred_meshes

requirements.txt CHANGED Viewed

@@ -2,4 +2,9 @@ opencv-python
 git+https://github.com/hassony2/manopth
 pyrender
 git+https://github.com/mattloper/chumpy.git
-gradio

 git+https://github.com/hassony2/manopth
 pyrender
 git+https://github.com/mattloper/chumpy.git
+gradio
+Pillow
+pydantic
+git+https://github.com/mmatl/pyopengl.git
+moviepy
+tqdm

settings.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
-if os.name != 'nt': os.environ["PYOPENGL_PLATFORM"] = "egl"
 import pyrender
 import numpy as np

 import os
+if os.name != 'nt':
+    os.environ["PYOPENGL_PLATFORM"] = "egl"
 import pyrender
 import numpy as np