File size: 5,742 Bytes
15bc41b
9564652
 
 
15bc41b
9564652
 
 
15bc41b
 
9564652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bc41b
9564652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bc41b
9564652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bc41b
9564652
 
 
 
 
 
 
 
 
15bc41b
9564652
 
 
15bc41b
9564652
 
 
 
15bc41b
9564652
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import os
import cv2
import numpy as np

import esim_py
from infererence import process_events, Ev2Hands
from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, REF_PERIOD


os.makedirs("temp", exist_ok=True)
ev2hands = Ev2Hands()


def get_frames(video_in, trim_in):            
    cap = cv2.VideoCapture(video_in)
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    stop_frame = int(trim_in * fps)

    print("video fps: " + str(fps))
    
    frames = []
    i = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT))
        frames.append(frame)

        if i > stop_frame:
            break
             
        i += 1
    
    
    cap.release()
    
    return frames, fps



def infer(video_inp, trim_in, threshold):
    frames, fps = get_frames(video_inp, trim_in)
    ts_s = 1 / fps
    ts_ns = ts_s * 1e9 # convert s to ns

    POS_THRESHOLD = NEG_THRESHOLD = threshold

    esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)  
    is_init = False

    event_frame_vid_path = 'temp/event_video.mp4'
    prediction_vid_path = 'temp/prediction_video.mp4'

    height, width, _ = frames[0].shape
    event_video = cv2.VideoWriter(event_frame_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    prediction_video = cv2.VideoWriter(prediction_vid_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    for idx, frame in enumerate(frames):
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
    
        current_ts_ns = idx * ts_ns

        if not is_init:
            esim.init(frame_log, current_ts_ns)
            is_init = True
            continue

        events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
        data = process_events(events)

        prediction_frame = ev2hands(data)
        event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)


        event_video.write(event_frame)
        prediction_video.write(prediction_frame)

    event_video.release()
    prediction_video.release()

    return event_frame_vid_path, prediction_vid_path
    

title = """
    <div style="text-align: center; max-width: 700px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
            Pix2Pix Video
        </h1>
        </div>
        <p style="margin-bottom: 10px; font-size: 94%">
        Apply Instruct Pix2Pix Diffusion to a video 
        </p>
    </div>
"""

article = """
    
    <div class="footer">
        <p>
        Examples by <a href="https://twitter.com/CitizenPlain" target="_blank">Nathan Shipley</a> •&nbsp;
        Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates 🤗
        </p>
    </div>
    <div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
        <p>You may also like: </p>
        <div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
            
            <svg height="20" width="162" style="margin-left:4px;margin-bottom: 6px;">       
                 <a href="https://huggingface.co/spaces/timbrooks/instruct-pix2pix" target="_blank">
                    <image href="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue" src="https://img.shields.io/badge/🤗 Spaces-Instruct_Pix2Pix-blue.png" height="20"/>
                 </a>
            </svg>
            
        </div>
    
    </div>
    
"""

with gr.Blocks(css='style.css') as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        with gr.Row():
            with gr.Column():
                video_inp = gr.Video(label="Video source", elem_id="input-vid")
                with gr.Row():
                    trim_in = gr.Slider(label="Cut video at (s)", minimum=1, maximum=5, step=1, value=1)
                    threshold = gr.Slider(label="Event Threshold", minimum=0.1, maximum=1, step=0.05, value=0.5)

            with gr.Column():
                event_frame_out = gr.Video(label="Event Frame", elem_id="video-output")
                prediction_out = gr.Video(label="Ev2Hands result", elem_id="video-output")

                gr.HTML("""
                <a style="display:inline-block" href="https://huggingface.co/spaces/fffiloni/Pix2Pix-Video?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a> 
                work with longer videos / skip the queue: 
                """, elem_id="duplicate-container")

                submit_btn = gr.Button("Run Ev2Hands")
        
        inputs = [video_inp, trim_in, threshold]
        outputs = [event_frame_out, prediction_out]        
        gr.HTML(article)
    
    submit_btn.click(infer, inputs, outputs)
        
demo.queue(max_size=12).launch(server_name="0.0.0.0", server_port=7860)