Spaces:

caltech-animal-tracking
/

Primate-Detection-GPU

Sleeping

App Files Files Community

annading commited on Apr 9

Commit

dd74532

•

1 Parent(s): 19f8958

added video downsampling

Browse files

Files changed (3) hide show

app.py +20 -4
dino_sam.py +2 -1
video_utils.py +2 -2

app.py CHANGED Viewed

@@ -24,10 +24,18 @@ def run_sam_dino(input_vid,
                  box_threshold,
                  text_threshold,
                  fps_processed,
                  video_options):
     new_input_vid = input_vid.replace(" ", "_")
     os.rename(input_vid, new_input_vid)
-    csv_path, vid_path = sam_dino_vid(new_input_vid, grounding_caption, box_threshold, text_threshold, fps_processed, video_options, batch_size=BATCH_SIZE)
     global CSV_PATH
     CSV_PATH = csv_path
     global VID_PATH
@@ -71,6 +79,14 @@ with gr.Blocks() as demo:
                     maximum=120,
                     value=30,
                     step=1)
                 video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
                                                  label="Video Output Options",
                                                  info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
@@ -84,12 +100,12 @@ with gr.Blocks() as demo:
             # download_btn = gr.Button(value="Generate Download", visible=True)
             download_file = gr.Files(label="CSV, Video Output", interactive=False)
-    run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options], outputs=[vid])
     vid.change(fn=vid_download, outputs=download_file)
     gr.Examples(
-        [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, ["Bounding boxes", "Masks"]]],
-        inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options],
         outputs = [vid],
         fn=run_sam_dino,
         cache_examples=True,

                  box_threshold,
                  text_threshold,
                  fps_processed,
+                 scaling_factor,
                  video_options):
     new_input_vid = input_vid.replace(" ", "_")
     os.rename(input_vid, new_input_vid)
+    csv_path, vid_path = sam_dino_vid(vid_path=new_input_vid,
+                                      text_prompt=grounding_caption,
+                                      box_threshold=box_threshold,
+                                      text_threshold=text_threshold,
+                                      fps_processed=fps_processed,
+                                      scaling_factor=scaling_factor,
+                                      video_options=video_options,
+                                      batch_size=BATCH_SIZE)
     global CSV_PATH
     CSV_PATH = csv_path
     global VID_PATH
                     maximum=120,
                     value=30,
                     step=1)
+                scaling_factor = gr.Slider(
+                    label="Downsample Factor",
+                    info="Adjust the downsample factor. I.e. a value of 0.5 will downsample the pixels in the video by a factor of 2. Note: the higher the number the slower the processing time.",
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.1
+                )
                 video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
                                                  label="Video Output Options",
                                                  info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
             # download_btn = gr.Button(value="Generate Download", visible=True)
             download_file = gr.Files(label="CSV, Video Output", interactive=False)
+    run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options], outputs=[vid])
     vid.change(fn=vid_download, outputs=download_file)
     gr.Examples(
+        [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, 1, ["Bounding boxes", "Masks"]]],
+        inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options],
         outputs = [vid],
         fn=run_sam_dino,
         cache_examples=True,

dino_sam.py CHANGED Viewed

@@ -33,6 +33,7 @@ def sam_dino_vid(
         box_threshold: float = 0.35,
         text_threshold: float = 0.25,
         fps_processed: int = 1,
         video_options: list[str] = ["Bounding boxes"],
         config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
         weights_path: str = "weights/groundingdino_swint_ogc.pth",
@@ -68,7 +69,7 @@ def sam_dino_vid(
     gd_model = load_model(config_path, weights_path, device=device)
     # process video and create a directory of video frames
-    fps = mp4_to_png(vid_path, frames_dir)
     # get the frame paths for the images to process
     frame_filenames = os.listdir(frames_dir)

         box_threshold: float = 0.35,
         text_threshold: float = 0.25,
         fps_processed: int = 1,
+        scaling_factor: float = 1.0,
         video_options: list[str] = ["Bounding boxes"],
         config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
         weights_path: str = "weights/groundingdino_swint_ogc.pth",
     gd_model = load_model(config_path, weights_path, device=device)
     # process video and create a directory of video frames
+    fps = mp4_to_png(vid_path, frames_dir, scaling_factor)
     # get the frame paths for the images to process
     frame_filenames = os.listdir(frames_dir)

video_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import cv2
 import os
 from tqdm import tqdm
-def mp4_to_png(input_path: str, save_path: str) -> str:
     """ Converts mp4 to pngs for each frame of the video.
         Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
         Returns: save_path, fps the number of frames per second.
@@ -12,7 +12,7 @@ def mp4_to_png(input_path: str, save_path: str) -> str:
     # get frames per second
     fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
     # run subprocess to convert mp4 to pngs
-    os.system(f"ffmpeg -i {input_path} -vf fps={fps} {save_path}/frame%08d.png")
     return fps
 def frame_to_timestamp(frame_number: int, fps: int):

 import os
 from tqdm import tqdm
+def mp4_to_png(input_path: str, save_path: str, scale_factor: float) -> str:
     """ Converts mp4 to pngs for each frame of the video.
         Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
         Returns: save_path, fps the number of frames per second.
     # get frames per second
     fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
     # run subprocess to convert mp4 to pngs
+    os.system(f"ffmpeg -i {input_path} -vf 'scale=iw*{scale_factor}:ih*{scale_factor}, fps={fps}' {save_path}/frame%08d.png")
     return fps
 def frame_to_timestamp(frame_number: int, fps: int):