File size: 5,902 Bytes
19f8958
 
 
 
036d6f1
19f8958
82ee3e2
 
80eb764
82ee3e2
036d6f1
2081ef8
82ee3e2
80eb764
 
82ee3e2
 
 
 
19f8958
82ee3e2
 
 
 
 
 
dd74532
82ee3e2
f000dbd
 
dd74532
 
 
 
 
68f46ec
dd74532
 
82ee3e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e81f41
82ee3e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d52f0e
82ee3e2
dd74532
 
68f46ec
 
 
 
 
dd74532
82ee3e2
 
adab5b0
82ee3e2
 
 
 
 
 
 
 
 
 
dd74532
82ee3e2
 
80eb764
dd74532
 
80eb764
 
 
 
 
19f8958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82ee3e2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Setting global vars 
CSV_PATH = ""
VID_PATH = ""
BATCH_SIZE = 5 # CHANGE THIS TO FIT YOUR GPU, YOU MAY NEED SOME TRIAL AND ERROR
CUDA_PATH = "/usr/local/cuda-12.3/" # CHANGE THIS TO YOUR LOCAL CUDA PATH

import subprocess
import os 
import sys
# set CUDA_HOME
os.environ["CUDA_HOME"] = "/usr/local/cuda-12.3/"
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:5000'
subprocess.run(['pip', 'install', '-e', 'GroundingDINO'])
sys.path.append(os.path.join(os.getcwd(), "GroundingDINO"))
sys.path.append(os.path.join(os.getcwd(), "segment_anything"))

import gradio as gr
from dino_sam import sam_dino_vid



def run_sam_dino(input_vid,
                 grounding_caption,
                 box_threshold,
                 text_threshold,
                 fps_processed,
                 scaling_factor,
                 video_options):
    new_input_vid = input_vid.replace(" ", "_")
    os.rename(input_vid, new_input_vid)
    csv_path, vid_path = sam_dino_vid(vid_path=new_input_vid,
                                      text_prompt=grounding_caption,
                                      box_threshold=box_threshold,
                                      text_threshold=text_threshold,
                                      fps_processed=fps_processed,
                                      scaling_factor=(1/scaling_factor), # size gets multplied by this so we need to invert it
                                      video_options=video_options,
                                      batch_size=BATCH_SIZE)
    global CSV_PATH
    CSV_PATH = csv_path
    global VID_PATH
    VID_PATH = vid_path
    return vid_path

def vid_download():
    """
    """
    print(CSV_PATH, VID_PATH)
    return [CSV_PATH, VID_PATH]


with gr.Blocks() as demo:
    gr.HTML(
        """
            <h1 align="center" style="font-size:xxx-large">🦍 Primate Detection</h1>
        """
    )
    
    with gr.Row():
        with gr.Column():
            input = gr.Video(label="Input Video", interactive=True)
            grounding_caption = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by periods. i.e. 'baboon . chimpanzee .')")
            with gr.Accordion("Advanced Options", open=False):
                box_threshold = gr.Slider(
                    label="Box Threshold",
                    info="Adjust the threshold to change the sensitivity of the model, lower thresholds being more sensitive.",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.25,
                    step=0.01
                )
                text_threshold = gr.Slider(
                    label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.01
                )
                fps_processed = gr.Slider(
                    label="Frame Detection Rate",
                    info="Adjust the frame detection rate. I.e. a value of 120 will run detection every 120 frames, a value of 1 will run detection on every frame. Note: the lower the number the slower the processing time.",
                    minimum=1,
                    maximum=120,
                    value=30, 
                    step=1)
                scaling_factor = gr.Slider(
                    label="Downsample Factor",
                    info="Adjust the downsample factor. Note: the higher the number the faster the processing time but lower the accuracy.",
                    minimum=1,
                    maximum=5,
                    value=2,
                    step=1
                )
                video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
                                                 label="Video Output Options",
                                                 info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
                                                 value=["Bounding boxes"],
                                                 interactive=True)

            # TODO: Make button visible only after a file has been uploaded
            run_btn = gr.Button(value="Run Detection", visible=True)
        with gr.Column():
            vid = gr.Video(label="Output Video", height=350, interactive=False, visible=True)
            # download_btn = gr.Button(value="Generate Download", visible=True)
            download_file = gr.Files(label="CSV, Video Output", interactive=False)
    
    run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options], outputs=[vid])
    vid.change(fn=vid_download, outputs=download_file)

    gr.Examples(
        [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, 1, ["Bounding boxes", "Masks"]]],
        inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options],
        outputs = [vid],
        fn=run_sam_dino,
        cache_examples=True,
        label='Example'
      )
    
    gr.DuplicateButton()
    
    gr.Markdown(
        """
        ## Frequently Asked Questions 
        
        ##### How can I run the interface on my own computer? 
        By clicking on the three dots on the top right corner of the interface, you will be able to clone the repository or run it with a Docker image on your local machine. \
        For local machine setup instructions please check the README file. 

        ##### The video is very slow to process, how can I speed it up?
        You can speed up the processing by adjusting the frame detection rate in the advanced options. The lower the number the slower the processing time. Choosing only\
        bounding boxes will make the processing faster. You can also duplicate the space using the Duplicate Button and choose a different GPU which will make the processing faster.

        """
    )

demo.launch(share=False)