File size: 2,369 Bytes
9fd9702
68916f7
 
9fd9702
 
d9e93e5
9fd9702
 
436302d
9fd9702
6d2edfa
436302d
 
 
 
 
 
9fd9702
 
6d2edfa
9fd9702
 
 
 
 
a74b843
436302d
 
 
 
9fd9702
 
 
ad9569f
436302d
 
 
 
 
 
 
 
 
 
 
 
9fd9702
 
 
 
5c2dd9a
a74b843
6d2edfa
9fd9702
 
 
 
 
436302d
9fd9702
 
c327af5
 
9fd9702
 
 
074e144
c327af5
9fd9702
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Importing the requirements
import warnings
warnings.filterwarnings("ignore")

import gradio as gr
from src.app.response import describe_video


# Video, text query, and input parameters
video = gr.Video(label="Video")
query = gr.Textbox(label="Question", placeholder="Enter your question here")
temperature = gr.Slider(
    minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature"
)
top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label="Top P")
top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="Top K")
max_new_tokens = gr.Slider(minimum=1, maximum=4096, step=1, value=512, label="Max Tokens")

# Output for the interface
response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)

# Examples for the interface
examples = [
    [
        "./videos/sample_video_1.mp4",
        "Here are some frames of a video. Describe this video.",
        0.7,
        0.8,
        100,
        512,
    ],
    [
        "./videos/sample_video_2.mp4",
        "¿Cuál es el animal de este vídeo? ¿Cuantos animales hay?",
        0.7,
        0.8,
        100,
        512,
    ],
    [
        "./videos/sample_video_3.mp4",
        "Que se passe-t-il dans cette vidéo ?",
        0.7,
        0.8,
        100,
        512,
    ],
]

# Title, description, and article for the interface
title = "Video Question Answering"
description = "Gradio Demo for the MiniCPM-V 2.6 Vision Language Understanding and Generation model. This model can answer questions about videos in natural language. To use it, upload your video, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"


# Launch the interface
interface = gr.Interface(
    fn=describe_video,
    inputs=[video, query, temperature, top_p, top_k, max_new_tokens],
    outputs=response,
    examples=examples,
    cache_examples=True,
    cache_mode="lazy",
    title=title,
    description=description,
    article=article,
    theme="ParityError/Anime",
    flagging_mode="never",
)
interface.launch(debug=False)