ev2hands / app.py
Christen Millerdurai
fixed typo and added documentation
ea732b1
import gradio as gr
import os
import cv2
import numpy as np
from tqdm import tqdm
from moviepy.editor import *
import tempfile
import esim_py
from infererence import process_events, Ev2Hands
from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH
ev2hands = Ev2Hands()
def create_video(frames, fps, path):
clip = ImageSequenceClip(frames, fps=fps)
clip.write_videofile(path, fps=fps)
return path
def get_frames(video_in, trim_in):
cap = cv2.VideoCapture(video_in)
fps = cap.get(cv2.CAP_PROP_FPS)
stop_frame = int(trim_in * fps)
print("video fps: " + str(fps))
frames = []
i = 0
while(cap.isOpened()):
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT))
frames.append(frame)
if i > stop_frame:
break
i += 1
cap.release()
return frames, fps
def change_model(model_slider, eventframe_files, mesh_files):
if mesh_files is None:
return None, None, None
if model_slider >= len(mesh_files):
model_slider = len(mesh_files)
idx = int(model_slider - 1)
event_frame_path = eventframe_files[idx]
mesh_path = mesh_files[idx]
return model_slider, event_frame_path, mesh_path
def infer(video_inp, trim_in, threshold):
if video_inp is None:
return None, None, None, None
frames, fps = get_frames(video_inp, trim_in)
ts_s = 1 / fps
ts_ns = ts_s * 1e9 # convert s to ns
POS_THRESHOLD = NEG_THRESHOLD = threshold
REF_PERIOD = 0
print(f'Threshold: {threshold}')
esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
is_init = False
temp_folder = f'temp/{next(tempfile._get_candidate_names())}'
event_frame_folder = f'{temp_folder}/event_frames'
mesh_folder = f'{temp_folder}/meshes'
os.makedirs(event_frame_folder, exist_ok=True)
os.makedirs(mesh_folder, exist_ok=True)
mesh_paths = list()
event_frames = list()
for idx, frame in enumerate(tqdm(frames)):
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
current_ts_ns = idx * ts_ns
if not is_init:
esim.init(frame_log, current_ts_ns)
is_init = True
continue
events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
data = process_events(events)
mesh = ev2hands(data)
mesh_path = f'{mesh_folder}/{idx}.obj'
mesh.export(mesh_path)
mesh_paths.append(mesh_path)
event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
event_frame_path = f'{event_frame_folder}/{idx}.jpg'
cv2.imwrite(event_frame_path, event_frame)
event_frames.append(event_frame_path)
return event_frames, event_frames[0], mesh_paths, mesh_paths[0]
with gr.Blocks(css='style.css') as demo:
gr.Markdown(
"""
<div align="center">
<h1>Ev2Hands: 3D Pose Estimation of Two Interacting Hands from a Monocular Event Camera</h1>
</div>
"""
)
gr.Markdown(
"""
<div align="center">
<h4>
Note: The model's performance may be suboptimal as the event stream derived from the input video inadequately reflects the characteristics of an event stream generated by an event camera. 🚫📹
</h4>
</div>
"""
)
gr.Markdown(
"""
<p align="center">
<a title="Project Page" href="https://4dqv.mpi-inf.mpg.de/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
<img src="https://img.shields.io/badge/Project-Website-5B7493?logo=googlechrome&logoColor=5B7493">
</a>
<a title="arXiv" href="https://arxiv.org/abs/2312.14157" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
<img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=b31b1b">
</a>
<a title="GitHub" href="https://github.com/Chris10M/Ev2Hands/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
<img src="https://img.shields.io/github/stars/Chris10M/Ev2Hands?label=GitHub%20%E2%98%85&&logo=github" alt="badge-github-stars">
</a>
<a title="Video" href="https://youtu.be/nvES_c5vRfU" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
<img src="https://img.shields.io/badge/YouTube-Video-red?logo=youtube&logoColor=red">
</a>
<a title="Visitor" href="https://hits.seeyoufarm.com" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
<img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fchris10%2Fev2hands&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false">
</a>
</p>
"""
)
with gr.Column(elem_id="col-container"):
# gr.HTML(title)
with gr.Row():
with gr.Column():
gr.Markdown("<h3>Input: RGB video. We convert the video into an event stream.✨📹</h3>")
video_inp = gr.Video(label="Video source", elem_id="input-vid")
with gr.Row():
trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.8)
gr.Examples(
examples=[os.path.join(os.path.dirname(__file__), "examples/video.mp4")],
inputs=video_inp,
)
with gr.Column():
eventframe_files = gr.Files(visible=False, label='Event frame paths')
mesh_files = gr.Files(visible=False, label='3D Mesh Files')
event_frame = gr.Image(label="Event Frame")
prediction_out = gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0], label="Ev2Hands Result")
model_slider = gr.Slider(minimum=1, step=1, label="Frame Number")
gr.HTML("""
<a style="display:inline-block" href="https://huggingface.co/spaces/chris10/ev2hands?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
work with longer videos / skip the queue:
""", elem_id="duplicate-container")
submit_btn = gr.Button("Run Ev2Hands")
inputs = [video_inp, trim_in, threshold]
outputs = [eventframe_files, event_frame, mesh_files, prediction_out]
submit_btn.click(infer, inputs, outputs)
model_slider.change(change_model, [model_slider, eventframe_files, mesh_files], [model_slider, event_frame, prediction_out])
demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)