import gradio as gr import os import cv2 import numpy as np import esim_py from infererence import process_events, Ev2Hands from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, REF_PERIOD os.makedirs("temp", exist_ok=True) ev2hands = Ev2Hands() def get_frames(video_in, trim_in): cap = cv2.VideoCapture(video_in) fps = cap.get(cv2.CAP_PROP_FPS) stop_frame = int(trim_in * fps) print("video fps: " + str(fps)) frames = [] i = 0 while(cap.isOpened()): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT)) frames.append(frame) if i > stop_frame: break i += 1 cap.release() return frames, fps def infer(video_inp, trim_in, threshold): frames, fps = get_frames(video_inp, trim_in) ts_s = 1 / fps ts_ns = ts_s * 1e9 # convert s to ns POS_THRESHOLD = NEG_THRESHOLD = threshold esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True) is_init = False event_frame_vid_path = 'temp/event_video.mp4' prediction_vid_path = 'temp/prediction_video.mp4' height, width, _ = frames[0].shape event_video = cv2.VideoWriter(event_frame_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) for idx, frame in enumerate(frames): frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4) current_ts_ns = idx * ts_ns if not is_init: esim.init(frame_log, current_ts_ns) is_init = True continue events = esim.generateEventFromCVImage(frame_log, current_ts_ns) data = process_events(events) prediction_frame = ev2hands(data) event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8) event_video.write(event_frame) prediction_video.write(prediction_frame) event_video.release() prediction_video.release() return event_frame_vid_path, prediction_vid_path title = """

Pix2Pix Video

Apply Instruct Pix2Pix Diffusion to a video

""" article = """

You may also like:

""" with gr.Blocks(css='style.css') as demo: with gr.Column(elem_id="col-container"): gr.HTML(title) with gr.Row(): with gr.Column(): video_inp = gr.Video(label="Video source", elem_id="input-vid") with gr.Row(): trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1) threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5) with gr.Column(): event_frame_out = gr.Video(label="Event Frame", elem_id="video-output") prediction_out = gr.Video(label="Ev2Hands result", elem_id="video-output") gr.HTML("""

work with longer videos / skip the queue: """, elem_id="duplicate-container") submit_btn = gr.Button("Run Ev2Hands") inputs = [video_inp, trim_in, threshold] outputs = [event_frame_out, prediction_out] gr.HTML(article) submit_btn.click(infer, inputs, outputs) demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)