ev2hands

Running

File size: 5,742 Bytes

import gradio as gr
import os
import cv2
import numpy as np

import esim_py
from infererence import process_events, Ev2Hands
from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, REF_PERIOD


os.makedirs("temp", exist_ok=True)
ev2hands = Ev2Hands()


def get_frames(video_in, trim_in):            
    cap = cv2.VideoCapture(video_in)
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    stop_frame = int(trim_in * fps)

    print("video fps: " + str(fps))
    
    frames = []
    i = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT))
        frames.append(frame)

        if i > stop_frame:
            break
             
        i += 1
    
    
    cap.release()
    
    return frames, fps



def infer(video_inp, trim_in, threshold):
    frames, fps = get_frames(video_inp, trim_in)
    ts_s = 1 / fps
    ts_ns = ts_s * 1e9 # convert s to ns

    POS_THRESHOLD = NEG_THRESHOLD = threshold

    esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)  
    is_init = False

    event_frame_vid_path = 'temp/event_video.mp4'
    prediction_vid_path = 'temp/prediction_video.mp4'

    height, width, _ = frames[0].shape
    event_video = cv2.VideoWriter(event_frame_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    for idx, frame in enumerate(frames):
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
    
        current_ts_ns = idx * ts_ns

        if not is_init:
            esim.init(frame_log, current_ts_ns)
            is_init = True
            continue

        events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
        data = process_events(events)

        prediction_frame = ev2hands(data)
        event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)


        event_video.write(event_frame)
        prediction_video.write(prediction_frame)

    event_video.release()
    prediction_video.release()

    return event_frame_vid_path, prediction_vid_path
    

title = """
    <div style="text-align: center; max-width: 700px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
            Pix2Pix Video
        </h1>
        </div>
        <p style="margin-bottom: 10px; font-size: 94%">
        Apply Instruct Pix2Pix Diffusion to a video 
        </p>
    </div>
"""

article = """
    
    <div class="footer">
        <p>
        Examples by <a href="https://twitter.com/CitizenPlain" target="_blank">Nathan Shipley</a> •&nbsp;
        Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates 🤗
        </p>
    </div>
    <div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
        <p>You may also like: </p>
        <div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
            
            <svg height="20" width="162" style="margin-left:4px;margin-bottom: 6px;">       
                 <a href="https://huggingface.co/spaces/timbrooks/instruct-pix2pix" target="_blank">
                    <image href="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue" src="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue.png" height="20"/>
                 </a>
            </svg>
            
        </div>
    
    </div>
    
"""

with gr.Blocks(css='style.css') as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        with gr.Row():
            with gr.Column():
                video_inp = gr.Video(label="Video source", elem_id="input-vid")
                with gr.Row():
                    trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
                    threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)

            with gr.Column():
                event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
                prediction_out = gr.Video(label="Ev2Hands result", elem_id="video-output")

                gr.HTML("""
                <a style="display:inline-block" href="https://huggingface.co/spaces/fffiloni/Pix2Pix-Video?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a> 
                work with longer videos / skip the queue: 
                """, elem_id="duplicate-container")

                submit_btn = gr.Button("Run Ev2Hands")
        
        inputs = [video_inp, trim_in, threshold]
        outputs = [event_frame_out, prediction_out]        
        gr.HTML(article)
    
    submit_btn.click(infer, inputs, outputs)
        
demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)