Spaces:

facebook
/

cotracker

Running on A10G

File size: 3,941 Bytes

2aba93c
 
 
 
 
 
 
 
 
d6d3990
2aba93c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6d3990
 
 
 
2aba93c
 
 
d6d3990
2aba93c
 
 
 
d6d3990
6393e70
d6d3990
 
 
 
 
2aba93c
 
 
 
 
d6d3990
2aba93c
 
 
 
 
 
 
d6d3990
2aba93c
 
 
 
 
 
 
 
 
d6d3990
2aba93c
d6d3990
 
2aba93c
 
 
 
d6d3990
2aba93c
 
d6d3990
 
 
2aba93c
 
 
d6d3990
 
2aba93c
 
 
 
 
 
 
 
 
 
 
 
d515d68
2aba93c
 
 
 
6393e70
 
 
 
 
2aba93c
737c008
2aba93c
 
737c008

import os
import cv2
import imutils
import torch
import numpy as np
import gradio as gr

from cotracker.utils.visualizer import Visualizer


def parse_video(video_file):
    vs = cv2.VideoCapture(video_file)

    frames = []
    while True:
        (gotit, frame) = vs.read()
        if frame is not None:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
        if not gotit:
            break

    return np.stack(frames)


def cotracker_demo(
    input_video,
    grid_size: int = 10,
    tracks_leave_trace: bool = False,
):
    load_video = parse_video(input_video)
    load_video = torch.from_numpy(load_video).permute(0, 3, 1, 2)[None].float()

    model = torch.hub.load("facebookresearch/co-tracker", "cotracker2_online")

    if torch.cuda.is_available():
        model = model.cuda()
        load_video = load_video.cuda()

    model(video_chunk=load_video, is_first_step=True, grid_size=grid_size)
    for ind in range(0, load_video.shape[1] - model.step, model.step):
        pred_tracks, pred_visibility = model(
            video_chunk=load_video[:, ind : ind + model.step * 2]
        )  # B T N 2,  B T N 1

    linewidth = 2
    if grid_size < 10:
        linewidth = 4
    elif grid_size < 20:
        linewidth = 3

    vis = Visualizer(
        save_dir=os.path.join(os.path.dirname(__file__), "results"),
        grayscale=False,
        pad_value=100,
        fps=10,
        linewidth=linewidth,
        show_first_frame=5,
        tracks_leave_trace=-1 if tracks_leave_trace else 0,
    )
    import time

    def current_milli_time():
        return round(time.time() * 1000)

    filename = str(current_milli_time())
    vis.visualize(
        load_video.cpu(),
        tracks=pred_tracks.cpu(),
        visibility=pred_visibility.cpu(),
        filename=f"{filename}_pred_track",
    )
    return os.path.join(
        os.path.dirname(__file__), "results", f"{filename}_pred_track.mp4"
    )


apple = os.path.join(os.path.dirname(__file__), "videos", "apple.mp4")
bear = os.path.join(os.path.dirname(__file__), "videos", "bear.mp4")
paragliding_launch = os.path.join(
    os.path.dirname(__file__), "videos", "paragliding-launch.mp4"
)
paragliding = os.path.join(os.path.dirname(__file__), "videos", "paragliding.mp4")

app = gr.Interface(
    title="🎨 CoTracker: It is Better to Track Together",
    description="<div style='text-align: left;'> \
    <p>Welcome to <a href='http://co-tracker.github.io' target='_blank'>CoTracker</a>! This space demonstrates point (pixel) tracking in videos. \
    Points are sampled on a regular grid and are tracked jointly. </p> \
    <p> To get started, simply upload your <b>.mp4</b> video in landscape orientation or click on one of the example videos to load them. The shorter the video, the faster the processing. We recommend submitting short videos of length <b>2-7 seconds</b>.</p> \
    <ul style='display: inline-block; text-align: left;'> \
        <li>The total number of grid points is the square of <b>Grid Size</b>.</li> \
        <li>Check <b>Visualize Track Traces</b> to visualize traces of all the tracked points. </li> \
    </ul> \
    <p style='text-align: left'>For more details, check out our <a href='https://github.com/facebookresearch/co-tracker' target='_blank'>GitHub Repo</a> ⭐</p> \
    </div>",
    fn=cotracker_demo,
    inputs=[
        gr.Video(type="file", label="Input video", interactive=True),
        gr.Slider(minimum=10, maximum=80, step=1, value=10, label="Grid Size"),
        gr.Checkbox(label="Visualize Track Traces"),
    ],
    outputs=gr.Video(label="Video with predicted tracks"),
    examples=[
        [apple, 30, False],
        [apple, 10, True],
        [bear, 10, False],
        [paragliding, 10, False],
        [paragliding_launch, 10, False],
    ],
    cache_examples=True,
    allow_flagging=False,
)
app.queue(max_size=20, concurrency_count=2).launch(debug=True)