annading commited on
Commit
dd74532
1 Parent(s): 19f8958

added video downsampling

Browse files
Files changed (3) hide show
  1. app.py +20 -4
  2. dino_sam.py +2 -1
  3. video_utils.py +2 -2
app.py CHANGED
@@ -24,10 +24,18 @@ def run_sam_dino(input_vid,
24
  box_threshold,
25
  text_threshold,
26
  fps_processed,
 
27
  video_options):
28
  new_input_vid = input_vid.replace(" ", "_")
29
  os.rename(input_vid, new_input_vid)
30
- csv_path, vid_path = sam_dino_vid(new_input_vid, grounding_caption, box_threshold, text_threshold, fps_processed, video_options, batch_size=BATCH_SIZE)
 
 
 
 
 
 
 
31
  global CSV_PATH
32
  CSV_PATH = csv_path
33
  global VID_PATH
@@ -71,6 +79,14 @@ with gr.Blocks() as demo:
71
  maximum=120,
72
  value=30,
73
  step=1)
 
 
 
 
 
 
 
 
74
  video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
75
  label="Video Output Options",
76
  info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
@@ -84,12 +100,12 @@ with gr.Blocks() as demo:
84
  # download_btn = gr.Button(value="Generate Download", visible=True)
85
  download_file = gr.Files(label="CSV, Video Output", interactive=False)
86
 
87
- run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options], outputs=[vid])
88
  vid.change(fn=vid_download, outputs=download_file)
89
 
90
  gr.Examples(
91
- [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, ["Bounding boxes", "Masks"]]],
92
- inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, video_options],
93
  outputs = [vid],
94
  fn=run_sam_dino,
95
  cache_examples=True,
 
24
  box_threshold,
25
  text_threshold,
26
  fps_processed,
27
+ scaling_factor,
28
  video_options):
29
  new_input_vid = input_vid.replace(" ", "_")
30
  os.rename(input_vid, new_input_vid)
31
+ csv_path, vid_path = sam_dino_vid(vid_path=new_input_vid,
32
+ text_prompt=grounding_caption,
33
+ box_threshold=box_threshold,
34
+ text_threshold=text_threshold,
35
+ fps_processed=fps_processed,
36
+ scaling_factor=scaling_factor,
37
+ video_options=video_options,
38
+ batch_size=BATCH_SIZE)
39
  global CSV_PATH
40
  CSV_PATH = csv_path
41
  global VID_PATH
 
79
  maximum=120,
80
  value=30,
81
  step=1)
82
+ scaling_factor = gr.Slider(
83
+ label="Downsample Factor",
84
+ info="Adjust the downsample factor. I.e. a value of 0.5 will downsample the pixels in the video by a factor of 2. Note: the higher the number the slower the processing time.",
85
+ minimum=0.1,
86
+ maximum=1.0,
87
+ value=0.5,
88
+ step=0.1
89
+ )
90
  video_options = gr.CheckboxGroup(choices=["Bounding boxes", "Masks"],
91
  label="Video Output Options",
92
  info="Select the options to display in the output video. Note: if masks are selected, runtime will increase.",
 
100
  # download_btn = gr.Button(value="Generate Download", visible=True)
101
  download_file = gr.Files(label="CSV, Video Output", interactive=False)
102
 
103
+ run_btn.click(fn=run_sam_dino, inputs=[input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options], outputs=[vid])
104
  vid.change(fn=vid_download, outputs=download_file)
105
 
106
  gr.Examples(
107
+ [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, 1, ["Bounding boxes", "Masks"]]],
108
+ inputs = [input, grounding_caption, box_threshold, text_threshold, fps_processed, scaling_factor, video_options],
109
  outputs = [vid],
110
  fn=run_sam_dino,
111
  cache_examples=True,
dino_sam.py CHANGED
@@ -33,6 +33,7 @@ def sam_dino_vid(
33
  box_threshold: float = 0.35,
34
  text_threshold: float = 0.25,
35
  fps_processed: int = 1,
 
36
  video_options: list[str] = ["Bounding boxes"],
37
  config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
38
  weights_path: str = "weights/groundingdino_swint_ogc.pth",
@@ -68,7 +69,7 @@ def sam_dino_vid(
68
  gd_model = load_model(config_path, weights_path, device=device)
69
 
70
  # process video and create a directory of video frames
71
- fps = mp4_to_png(vid_path, frames_dir)
72
 
73
  # get the frame paths for the images to process
74
  frame_filenames = os.listdir(frames_dir)
 
33
  box_threshold: float = 0.35,
34
  text_threshold: float = 0.25,
35
  fps_processed: int = 1,
36
+ scaling_factor: float = 1.0,
37
  video_options: list[str] = ["Bounding boxes"],
38
  config_path: str = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
39
  weights_path: str = "weights/groundingdino_swint_ogc.pth",
 
69
  gd_model = load_model(config_path, weights_path, device=device)
70
 
71
  # process video and create a directory of video frames
72
+ fps = mp4_to_png(vid_path, frames_dir, scaling_factor)
73
 
74
  # get the frame paths for the images to process
75
  frame_filenames = os.listdir(frames_dir)
video_utils.py CHANGED
@@ -4,7 +4,7 @@ import cv2
4
  import os
5
  from tqdm import tqdm
6
 
7
- def mp4_to_png(input_path: str, save_path: str) -> str:
8
  """ Converts mp4 to pngs for each frame of the video.
9
  Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
10
  Returns: save_path, fps the number of frames per second.
@@ -12,7 +12,7 @@ def mp4_to_png(input_path: str, save_path: str) -> str:
12
  # get frames per second
13
  fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
14
  # run subprocess to convert mp4 to pngs
15
- os.system(f"ffmpeg -i {input_path} -vf fps={fps} {save_path}/frame%08d.png")
16
  return fps
17
 
18
  def frame_to_timestamp(frame_number: int, fps: int):
 
4
  import os
5
  from tqdm import tqdm
6
 
7
+ def mp4_to_png(input_path: str, save_path: str, scale_factor: float) -> str:
8
  """ Converts mp4 to pngs for each frame of the video.
9
  Args: input_path is the path to the mp4 file, save_path is the directory to save the frames.
10
  Returns: save_path, fps the number of frames per second.
 
12
  # get frames per second
13
  fps = int(cv2.VideoCapture(input_path).get(cv2.CAP_PROP_FPS))
14
  # run subprocess to convert mp4 to pngs
15
+ os.system(f"ffmpeg -i {input_path} -vf 'scale=iw*{scale_factor}:ih*{scale_factor}, fps={fps}' {save_path}/frame%08d.png")
16
  return fps
17
 
18
  def frame_to_timestamp(frame_number: int, fps: int):