added video downsampling
Browse files- app.py +20 -4
- dino_sam.py +2 -1
- video_utils.py +2 -2
app.py
CHANGED
@@ -24,10 +24,18 @@ def run_sam_dino(input_vid,
|
|
24 |
box_threshold,
|
25 |
text_threshold,
|
26 |
fps_processed,
|
|
|
27 |
video_options):
|
28 |
new_input_vid = input_vid.replace(" ", "_")
|
29 |
os.rename(input_vid, new_input_vid)
|
30 |
-
csv_path, vid_path = sam_dino_vid(new_input_vid,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
global CSV_PATH
|
32 |
CSV_PATH = csv_path
|
33 |
global VID_PATH
|
@@ -71,6 +79,14 @@ with gr.Blocks() as demo:
|
|
71 |
maximum=120,
|
72 |
value=30,
|
73 |
step=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
|
75 |
label="Video Output Options",
|
76 |
info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
|
@@ -84,12 +100,12 @@ with gr.Blocks() as demo:
|
|
84 |
# download_btn = gr.Button(value="Generate Download", visible=True)
|
85 |
download_file = gr.Files(label="CSV, Video Output", interactive=False)
|
86 |
|
87 |
-
run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options], outputs=[vid])
|
88 |
vid.change(fn=vid_download, outputs=download_file)
|
89 |
|
90 |
gr.Examples(
|
91 |
-
[["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, ["Bounding boxes", "Masks"]]],
|
92 |
-
inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options],
|
93 |
outputs = [vid],
|
94 |
fn=run_sam_dino,
|
95 |
cache_examples=True,
|
|
|
24 |
box_threshold,
|
25 |
text_threshold,
|
26 |
fps_processed,
|
27 |
+
scaling_factor,
|
28 |
video_options):
|
29 |
new_input_vid = input_vid.replace(" ", "_")
|
30 |
os.rename(input_vid, new_input_vid)
|
31 |
+
csv_path, vid_path = sam_dino_vid(vid_path=new_input_vid,
|
32 |
+
text_prompt=grounding_caption,
|
33 |
+
box_threshold=box_threshold,
|
34 |
+
text_threshold=text_threshold,
|
35 |
+
fps_processed=fps_processed,
|
36 |
+
scaling_factor=scaling_factor,
|
37 |
+
video_options=video_options,
|
38 |
+
batch_size=BATCH_SIZE)
|
39 |
global CSV_PATH
|
40 |
CSV_PATH = csv_path
|
41 |
global VID_PATH
|
|
|
79 |
maximum=120,
|
80 |
value=30,
|
81 |
step=1)
|
82 |
+
scaling_factor = gr.Slider(
|
83 |
+
label="Downsample Factor",
|
84 |
+
info="Adjust the downsample factor. I.e. a value of 0.5 will downsample the pixels in the video by a factor of 2. Note: the higher the number the slower the processing time.",
|
85 |
+
minimum=0.1,
|
86 |
+
maximum=1.0,
|
87 |
+
value=0.5,
|
88 |
+
step=0.1
|
89 |
+
)
|
90 |
video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
|
91 |
label="Video Output Options",
|
92 |
info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
|
|
|
100 |
# download_btn = gr.Button(value="Generate Download", visible=True)
|
101 |
download_file = gr.Files(label="CSV, Video Output", interactive=False)
|
102 |
|
103 |
+
run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options], outputs=[vid])
|
104 |
vid.change(fn=vid_download, outputs=download_file)
|
105 |
|
106 |
gr.Examples(
|
107 |
+
[["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, 1, ["Bounding boxes", "Masks"]]],
|
108 |
+
inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options],
|
109 |
outputs = [vid],
|
110 |
fn=run_sam_dino,
|
111 |
cache_examples=True,
|
dino_sam.py
CHANGED
@@ -33,6 +33,7 @@ def sam_dino_vid(
|
|
33 |
box_threshold: float = 0.35,
|
34 |
text_threshold: float = 0.25,
|
35 |
fps_processed: int = 1,
|
|
|
36 |
video_options: list[str] = ["Bounding boxes"],
|
37 |
config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
|
38 |
weights_path: str = "weights/groundingdino_swint_ogc.pth",
|
@@ -68,7 +69,7 @@ def sam_dino_vid(
|
|
68 |
gd_model = load_model(config_path, weights_path, device=device)
|
69 |
|
70 |
# process video and create a directory of video frames
|
71 |
-
fps = mp4_to_png(vid_path, frames_dir)
|
72 |
|
73 |
# get the frame paths for the images to process
|
74 |
frame_filenames = os.listdir(frames_dir)
|
|
|
33 |
box_threshold: float = 0.35,
|
34 |
text_threshold: float = 0.25,
|
35 |
fps_processed: int = 1,
|
36 |
+
scaling_factor: float = 1.0,
|
37 |
video_options: list[str] = ["Bounding boxes"],
|
38 |
config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
|
39 |
weights_path: str = "weights/groundingdino_swint_ogc.pth",
|
|
|
69 |
gd_model = load_model(config_path, weights_path, device=device)
|
70 |
|
71 |
# process video and create a directory of video frames
|
72 |
+
fps = mp4_to_png(vid_path, frames_dir, scaling_factor)
|
73 |
|
74 |
# get the frame paths for the images to process
|
75 |
frame_filenames = os.listdir(frames_dir)
|
video_utils.py
CHANGED
@@ -4,7 +4,7 @@ import cv2
|
|
4 |
import os
|
5 |
from tqdm import tqdm
|
6 |
|
7 |
-
def mp4_to_png(input_path: str, save_path: str) -> str:
|
8 |
""" Converts mp4 to pngs for each frame of the video.
|
9 |
Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
|
10 |
Returns: save_path, fps the number of frames per second.
|
@@ -12,7 +12,7 @@ def mp4_to_png(input_path: str, save_path: str) -> str:
|
|
12 |
# get frames per second
|
13 |
fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
|
14 |
# run subprocess to convert mp4 to pngs
|
15 |
-
os.system(f"ffmpeg -i {input_path} -vf fps={fps} {save_path}/frame%08d.png")
|
16 |
return fps
|
17 |
|
18 |
def frame_to_timestamp(frame_number: int, fps: int):
|
|
|
4 |
import os
|
5 |
from tqdm import tqdm
|
6 |
|
7 |
+
def mp4_to_png(input_path: str, save_path: str, scale_factor: float) -> str:
|
8 |
""" Converts mp4 to pngs for each frame of the video.
|
9 |
Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
|
10 |
Returns: save_path, fps the number of frames per second.
|
|
|
12 |
# get frames per second
|
13 |
fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
|
14 |
# run subprocess to convert mp4 to pngs
|
15 |
+
os.system(f"ffmpeg -i {input_path} -vf 'scale=iw*{scale_factor}:ih*{scale_factor}, fps={fps}' {save_path}/frame%08d.png")
|
16 |
return fps
|
17 |
|
18 |
def frame_to_timestamp(frame_number: int, fps: int):
|