File size: 2,419 Bytes
f1dd031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import os
import tempfile
import subprocess

# Define the function to call the command line script
def process_video(uploaded_video_path, texts):
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
        output_video_path = tmpfile.name

    command = [
        "python", "demo/video_demo_with_text.py", uploaded_video_path,
        "--out", output_video_path,
        "--masa_config", "configs/masa-gdino/masa_gdino_swinb_inference.py",
        "--masa_checkpoint", "saved_models/masa_models/gdino_masa.pth",
        "--texts", texts,
        "--score-thr", "0.2",
        "--unified",
        "--show_fps"
    ]

    subprocess.run(command, check=True)

    # Ensure the video is in a compatible format using ffmpeg
    converted_output_path = output_video_path.replace('.mp4', '_converted.mp4')
    ffmpeg_command = [
        "ffmpeg", "-i", output_video_path, "-c:v", "mpeg4", 
        "-c:a", "aac", "-b:a", "128k", "-movflags", "+faststart", converted_output_path
    ]
    subprocess.run(ffmpeg_command, check=True)

    return converted_output_path

css = """
#img-display-container {
    max-height: 100vh;
    }
#img-display-input {
    max-height: 80vh;
    }
#img-display-output {
    max-height: 80vh;
    }
"""

title = "# MASA Track Everything Demo"
description = """ MASA + GroundingDINO on your video files!
Please refer to our [paper](https://arxiv.org/abs/2406.04221), [project page](https://matchinganything.github.io/), or [github](https://github.com/siyuanliii/masa/tree/main?tab=readme-ov-file) for more details."""

with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### Video Object Tracking demo")

    with gr.Row():
        input_video = gr.Video(label="Input Video")
        input_texts = gr.Textbox(label="Input Texts")

    submit = gr.Button("Submit")
    processed_video = gr.Video(label="Processed Video")

    submit.click(process_video, inputs=[input_video, input_texts], outputs=processed_video)

    example_files = os.listdir('assets/examples_video')
    example_files.sort()
    example_files = [os.path.join('assets/examples_video', filename) for filename in example_files]
    examples = gr.Examples(examples=example_files, inputs=[input_video, input_texts], outputs=processed_video, fn=process_video, cache_examples=True)

if __name__ == '__main__':
    demo.queue().launch()