dragonSwing commited on
Commit
e0cedf5
1 Parent(s): 54696a3

Use tqdm for processing

Browse files
Files changed (7) hide show
  1. app.py +3 -3
  2. bg_modeling.py +10 -13
  3. convert_to_pdf.py +22 -0
  4. download_video.py +4 -4
  5. frame_differencing.py +8 -13
  6. post_process.py +25 -25
  7. requirements.txt +3 -3
app.py CHANGED
@@ -114,8 +114,8 @@ with gr.Blocks(css="style.css") as demo:
114
  You can browse your video from the local file system, or enter a video URL/YouTube video link to start processing.
115
 
116
  **Note**:
117
- - It will take a bit of time to complete (~40% of the original video length), so stay tuned!
118
- - If the YouTube downloader return errors, you can try again later or download video to your computer and then upload it to the app
119
  - Remember to press Enter if you are using an external URL
120
  """,
121
  elem_id="container",
@@ -176,7 +176,7 @@ with gr.Blocks(css="style.css") as demo:
176
  file_url = gr.Textbox(
177
  value="",
178
  label="Upload your file",
179
- placeholder="Enter a video url or YouTube link",
180
  show_label=False,
181
  )
182
  with gr.Column(scale=1, min_width=160):
 
114
  You can browse your video from the local file system, or enter a video URL/YouTube video link to start processing.
115
 
116
  **Note**:
117
+ - It will take some time to complete (~ half of the original video length), so stay tuned!
118
+ - If the YouTube video link doesn't work, you can try again later or download video to your computer and then upload it to the app
119
  - Remember to press Enter if you are using an external URL
120
  """,
121
  elem_id="container",
 
176
  file_url = gr.Textbox(
177
  value="",
178
  label="Upload your file",
179
+ placeholder="Enter a video url or YouTube video link",
180
  show_label=False,
181
  )
182
  with gr.Column(scale=1, min_width=160):
bg_modeling.py CHANGED
@@ -1,7 +1,7 @@
 
1
  import os
2
- import time
3
  import sys
4
- import cv2
5
  from utils import resize_image_frame
6
 
7
 
@@ -38,7 +38,9 @@ def capture_slides_bg_modeling(
38
  print("Unable to open video file: ", video_path)
39
  sys.exit()
40
 
41
- start = time.time()
 
 
42
  # Loop over subsequent frames.
43
  while cap.isOpened():
44
  ret, frame = cap.read()
@@ -66,21 +68,16 @@ def capture_slides_bg_modeling(
66
 
67
  png_filename = f"{screenshots_count:03}.jpg"
68
  out_file_path = os.path.join(output_dir_path, png_filename)
69
- print(f"Saving file at: {out_file_path}")
70
  cv2.imwrite(out_file_path, orig_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
 
71
 
72
  # p_non_zero >= MIN_PERCENT_THRESH, indicates motion/animations.
73
  # Hence wait till the motion across subsequent frames has settled down.
74
  elif capture_frame and p_non_zero >= MIN_PERCENT_THRESH:
75
  capture_frame = False
 
 
76
 
77
- end_time = time.time()
78
- print("***" * 10, "\n")
79
- print("Statistics:")
80
- print("---" * 10)
81
- print(f"Total Time taken: {round(end_time-start, 3)} secs")
82
- print(f"Total Screenshots captured: {screenshots_count}")
83
- print("---" * 10, "\n")
84
-
85
- # Release Video Capture object.
86
  cap.release()
 
1
+ import cv2
2
  import os
 
3
  import sys
4
+ from tqdm import tqdm
5
  from utils import resize_image_frame
6
 
7
 
 
38
  print("Unable to open video file: ", video_path)
39
  sys.exit()
40
 
41
+ num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
42
+ prog_bar = tqdm(total=num_frames)
43
+
44
  # Loop over subsequent frames.
45
  while cap.isOpened():
46
  ret, frame = cap.read()
 
68
 
69
  png_filename = f"{screenshots_count:03}.jpg"
70
  out_file_path = os.path.join(output_dir_path, png_filename)
 
71
  cv2.imwrite(out_file_path, orig_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
72
+ prog_bar.set_postfix_str(f"Total Screenshots: {screenshots_count}")
73
 
74
  # p_non_zero >= MIN_PERCENT_THRESH, indicates motion/animations.
75
  # Hence wait till the motion across subsequent frames has settled down.
76
  elif capture_frame and p_non_zero >= MIN_PERCENT_THRESH:
77
  capture_frame = False
78
+
79
+ prog_bar.update(1)
80
 
81
+ # Release progress bar and video capture object.
82
+ prog_bar.close()
 
 
 
 
 
 
 
83
  cap.release()
convert_to_pdf.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from utils import convert_slides_to_pdf
3
+
4
+ if __name__ == "__main__":
5
+ parser = argparse.ArgumentParser(
6
+ description="This script is used to convert video frames into slide PDFs."
7
+ )
8
+ parser.add_argument(
9
+ "-f", "--folder", help="Path to the image folder", type=str
10
+ )
11
+ parser.add_argument(
12
+ "-o",
13
+ "--out_path",
14
+ help="Path to the output PDF file. If None, the image directory will be used to store the output file.",
15
+ type=str,
16
+ )
17
+ args = parser.parse_args()
18
+
19
+ img_dir = args.folder
20
+ output_path = args.out_path
21
+
22
+ convert_slides_to_pdf(img_dir, output_path)
download_video.py CHANGED
@@ -15,8 +15,8 @@ def download_video_from_url(url, output_dir=DOWNLOAD_DIR):
15
 
16
  content_type = response.headers.get("content-type")
17
  if "video" not in content_type:
18
- print("The given URL is not a valid video")
19
- return None
20
  file_extension = mimetypes.guess_extension(content_type)
21
 
22
  os.makedirs(output_dir, exist_ok=True)
@@ -32,7 +32,7 @@ def download_video_from_url(url, output_dir=DOWNLOAD_DIR):
32
 
33
  except requests.exceptions.RequestException as e:
34
  print("An error occurred while downloading the video:", str(e))
35
- return None
36
 
37
 
38
  def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
@@ -52,7 +52,7 @@ def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
52
 
53
  except Exception as e:
54
  print("An error occurred while downloading the video:", str(e))
55
- return None
56
 
57
 
58
  def download_video(url, output_dir=DOWNLOAD_DIR):
 
15
 
16
  content_type = response.headers.get("content-type")
17
  if "video" not in content_type:
18
+ print("The given URL is not a valid video URL")
19
+ return
20
  file_extension = mimetypes.guess_extension(content_type)
21
 
22
  os.makedirs(output_dir, exist_ok=True)
 
32
 
33
  except requests.exceptions.RequestException as e:
34
  print("An error occurred while downloading the video:", str(e))
35
+ return
36
 
37
 
38
  def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
 
52
 
53
  except Exception as e:
54
  print("An error occurred while downloading the video:", str(e))
55
+ return
56
 
57
 
58
  def download_video(url, output_dir=DOWNLOAD_DIR):
frame_differencing.py CHANGED
@@ -1,7 +1,7 @@
1
  import cv2
2
  import os
3
- import time
4
  import sys
 
5
 
6
 
7
  def capture_slides_frame_diff(
@@ -24,12 +24,12 @@ def capture_slides_frame_diff(
24
  sys.exit()
25
 
26
  success, first_frame = cap.read()
 
 
27
 
28
  print("Using frame differencing for Background Subtraction...")
29
  print("---" * 10)
30
 
31
- start = time.time()
32
-
33
  # The 1st frame should always be present in the output directory.
34
  # Hence capture and save the 1st frame.
35
  if success:
@@ -42,10 +42,10 @@ def capture_slides_frame_diff(
42
 
43
  filename = f"{screenshots_count:03}.jpg"
44
  out_file_path = os.path.join(output_dir_path, filename)
45
- print(f"Saving file at: {out_file_path}")
46
 
47
  # Save frame.
48
  cv2.imwrite(out_file_path, first_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
 
49
 
50
  # Loop over subsequent frames.
51
  while cap.isOpened():
@@ -79,18 +79,13 @@ def capture_slides_frame_diff(
79
 
80
  filename = f"{screenshots_count:03}.jpg"
81
  out_file_path = os.path.join(output_dir_path, filename)
82
- print(f"Saving file at: {out_file_path}")
83
 
84
  cv2.imwrite(out_file_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
 
85
 
86
  prev_frame = curr_frame
 
87
 
88
- end_time = time.time()
89
- print("***" * 10, "\n")
90
- print("Statistics:")
91
- print("---" * 5)
92
- print(f"Total Time taken: {round(end_time-start, 3)} secs")
93
- print(f"Total Screenshots captured: {screenshots_count}")
94
- print("---" * 10, "\n")
95
-
96
  cap.release()
 
1
  import cv2
2
  import os
 
3
  import sys
4
+ from tqdm import tqdm
5
 
6
 
7
  def capture_slides_frame_diff(
 
24
  sys.exit()
25
 
26
  success, first_frame = cap.read()
27
+ num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
28
+ prog_bar = tqdm(total=num_frames)
29
 
30
  print("Using frame differencing for Background Subtraction...")
31
  print("---" * 10)
32
 
 
 
33
  # The 1st frame should always be present in the output directory.
34
  # Hence capture and save the 1st frame.
35
  if success:
 
42
 
43
  filename = f"{screenshots_count:03}.jpg"
44
  out_file_path = os.path.join(output_dir_path, filename)
 
45
 
46
  # Save frame.
47
  cv2.imwrite(out_file_path, first_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
48
+ prog_bar.update(1)
49
 
50
  # Loop over subsequent frames.
51
  while cap.isOpened():
 
79
 
80
  filename = f"{screenshots_count:03}.jpg"
81
  out_file_path = os.path.join(output_dir_path, filename)
 
82
 
83
  cv2.imwrite(out_file_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
84
+ prog_bar.set_postfix_str(f"Total Screenshots: {screenshots_count}")
85
 
86
  prev_frame = curr_frame
87
+ prog_bar.update(1)
88
 
89
+ # Release progress bar and video capture object.
90
+ prog_bar.close()
 
 
 
 
 
 
91
  cap.release()
post_process.py CHANGED
@@ -2,6 +2,7 @@ import imagehash
2
  import os
3
  from collections import deque
4
  from PIL import Image
 
5
 
6
 
7
  def find_similar_images(
@@ -16,31 +17,30 @@ def find_similar_images(
16
 
17
  print("---" * 5, "Finding similar files", "---" * 5)
18
 
19
- for file in snapshots_files:
20
- read_file = Image.open(os.path.join(base_dir, file))
21
- comp_hash = hashfunc(read_file, hash_size=hash_size)
22
- duplicate = False
23
-
24
- if comp_hash not in hash_dict:
25
- hash_dict[comp_hash] = file
26
- # Compare with hash queue to find out potential duplicates
27
- for img_hash in hash_queue:
28
- if img_hash - comp_hash <= threshold:
29
- duplicate = True
30
- break
31
-
32
- if not duplicate:
33
- hash_queue.append(comp_hash)
34
- else:
35
- duplicate = True
36
-
37
- if duplicate:
38
- print("Duplicate file: ", file)
39
- duplicates.append(file)
40
- num_duplicates += 1
41
-
42
- print("\nTotal duplicate files:", num_duplicates)
43
- print("-----" * 10)
44
  return hash_dict, duplicates
45
 
46
 
 
2
  import os
3
  from collections import deque
4
  from PIL import Image
5
+ from tqdm import tqdm
6
 
7
 
8
  def find_similar_images(
 
17
 
18
  print("---" * 5, "Finding similar files", "---" * 5)
19
 
20
+ with tqdm(snapshots_files) as t:
21
+ for file in t:
22
+ read_file = Image.open(os.path.join(base_dir, file))
23
+ comp_hash = hashfunc(read_file, hash_size=hash_size)
24
+ duplicate = False
25
+
26
+ if comp_hash not in hash_dict:
27
+ hash_dict[comp_hash] = file
28
+ # Compare with hash queue to find out potential duplicates
29
+ for img_hash in hash_queue:
30
+ if img_hash - comp_hash <= threshold:
31
+ duplicate = True
32
+ break
33
+
34
+ if not duplicate:
35
+ hash_queue.append(comp_hash)
36
+ else:
37
+ duplicate = True
38
+
39
+ if duplicate:
40
+ duplicates.append(file)
41
+ num_duplicates += 1
42
+ t.set_postfix_str(f"Duplicate files: {num_duplicates}")
43
+
 
44
  return hash_dict, duplicates
45
 
46
 
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- opencv-contrib-python==4.7.0.72
2
  numpy
3
  Pillow
4
  scipy
5
  six
6
  ImageHash
7
- imutils
8
  img2pdf
 
9
  pytube
10
  validators
11
- requests
 
 
 
1
  numpy
2
  Pillow
3
  scipy
4
  six
5
  ImageHash
 
6
  img2pdf
7
+ imutils
8
  pytube
9
  validators
10
+ requests
11
+ tqdm