Iskaj commited on
Commit
bfd4fcf
1 Parent(s): 7b125d1

update to latest with Daan's changes

Browse files
Testing Cropping.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -90,7 +90,8 @@ transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to
90
  with open('apb2022.json', "r") as json_file:
91
  TARGET_ENTRIES = json.load(json_file)
92
 
93
- EXAMPLE_VIDEO_URLS = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
 
94
  "https://video.twimg.com/amplify_video/1575576025651617796/vid/480x852/jP057nPfPJSUM0kR.mp4?tag=14",
95
  "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download"]
96
 
 
90
  with open('apb2022.json', "r") as json_file:
91
  TARGET_ENTRIES = json.load(json_file)
92
 
93
+ EXAMPLE_VIDEO_URLS = ["https://www.youtube.com/watch?v=qIaqMqMweM4",
94
+ "https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
95
  "https://video.twimg.com/amplify_video/1575576025651617796/vid/480x852/jP057nPfPJSUM0kR.mp4?tag=14",
96
  "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download"]
97
 
config.py CHANGED
@@ -5,5 +5,5 @@ VIDEO_DIRECTORY = tempfile.gettempdir()
5
 
6
  FPS = 5
7
  MIN_DISTANCE = 4
8
- MAX_DISTANCE = 30 # Used to be 30
9
  ROLLING_WINDOW_SIZE = 10
 
5
 
6
  FPS = 5
7
  MIN_DISTANCE = 4
8
+ MAX_DISTANCE = 50 # Used to be 30
9
  ROLLING_WINDOW_SIZE = 10
data/0fd24e056447f6cd068f40ba2e41828c.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d304bf65b174cc6d446536e183062988d13450eb27d6e1490b6024813cfca61
3
  size 18035
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beba3b8f175ccd3b8fc74c32524d013f85dc0c19fcd467d8e13d75381886f4a9
3
  size 18035
data/1de5e559d3d190ca09ac3fa6b40a0138.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a81da7184d4a73b80512031805e28d76adbf45c508b16cde173a9cc980104b48
3
  size 5395
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31a5c8e5d2b8c433f2a570059ee680c1fddd22118915cbe3e964f9019730d238
3
  size 5395
data/265732a300bbe18c4d39df108fe0c121.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3d30c03a7817befd97b670f124e80bfcd2c6862a3bf3d0a57bdb7c5cad7122b
3
  size 284155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab0ada81fdd3e5ab2e7e3d276c38dabf052c2c0d289dd3021da0406b892bbbb
3
  size 284155
data/2909574cb590231c5d1028032bd849a8.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8634dffda2a11307f55712a079bba4e0e38895f9f3836d26c5a32a051580cfcd
3
  size 6287235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060b6cfb3c748275d824866bed83033a9b75234d215c36d9a04454d41c7d6697
3
  size 6287235
data/2f71b6c1d5c91c4b73eef2ebd05bd848.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4357ef337a87f67f43283e770eb444c2261f3233c64d6aef04f671a1b4237780
3
  size 3725195
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8df69be9a6eb7c614791f6d06d0c341d7f063d74031e064fafa99c08df6c0c
3
  size 3725195
data/75617545ba72efdc9a7b2b140184a9c6.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c28e09d9cee38776ad698e56209573f2e3f9f67a533b10b9001a25b39e34321e
3
  size 3305875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d76445b9fad00277f34890930f3aae6d6e078b3308ea18c83ad065db0d1754
3
  size 3305875
data/9c0a24aa59358ee014ded54c88eaa6c1.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35786ba354b46631f880ff468b04f234dd0a8fff1ea159385d9a252272a53595
3
  size 294515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e714afd9b69d1104adb7192352d6a3b8070510f03840fbb2fc9f2b95ccffac
3
  size 294515
data/a3efa8f2e01e0964d7987e6b1e4b3b44.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab33d752701be55ece43a841e0318a7a8eb717c9fb3c09c9e9cccd57528f65b
3
  size 5395
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54185972343a709462b8e72ad79d0e7cb3bf33aff65c92f5d65ca8b8c0e73c39
3
  size 5395
data/b3cf94671de7aadc22970483150aafd5.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30157e51167c48c9e9da8400bd2efb4db730cf99060ec6b958e593caf4b934ca
3
  size 3075
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82e10462195550af464d3e4c2488712dd01b3317dd275c9caf6cea94ad80e4a3
3
  size 3075
data/d0369414718aa9573ff9d009f4f61cc0.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da68a19cadf2abcba5da6660393547599ae3a70f73c683ce236962f44a6f24ce
3
  size 6520355
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96c567a4fb8b625441781820ebdc7db7835fc0a73dd6c54c0d7ad540428038d3
3
  size 6520355
requirements.txt CHANGED
@@ -6,4 +6,6 @@ faiss-cpu==1.7.2
6
  Pillow==9.2.0
7
  kats==0.2.0
8
  seaborn==0.12.0
9
- scipy==1.7.3
 
 
 
6
  Pillow==9.2.0
7
  kats==0.2.0
8
  seaborn==0.12.0
9
+ scipy==1.7.3
10
+ pytube==12.1.0
11
+ pytube3==9.6.4
videohash.py CHANGED
@@ -3,11 +3,14 @@ import urllib.request
3
  import shutil
4
  import logging
5
  import hashlib
 
6
 
7
  from PIL import Image
8
  import imagehash
9
  from moviepy.editor import VideoFileClip
10
- import numpy as np
 
 
11
 
12
  from config import FPS, VIDEO_DIRECTORY
13
 
@@ -18,12 +21,26 @@ def filepath_from_url(url):
18
 
19
  def download_video_from_url(url):
20
  """Download video from url or return md5 hash as video name"""
21
- # TODO: Make work for Google link
22
  filepath = filepath_from_url(url)
 
 
23
  if not os.path.exists(filepath):
 
 
 
 
 
 
 
 
 
 
 
24
  with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
25
- shutil.copyfileobj(f, fileout, length=16*1024)
26
- logging.info(f"Downloaded video from {url} to {filepath}.")
 
27
  else:
28
  logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
29
  return filepath
@@ -41,6 +58,13 @@ def change_ffmpeg_fps(clip, fps=FPS):
41
  clip.reader.lastread = clip.reader.read_frame()
42
  return clip
43
 
 
 
 
 
 
 
 
44
  def compute_hash(frame, hash_size=16):
45
  image = Image.fromarray(np.array(frame))
46
  return imagehash.phash(image, hash_size)
@@ -51,10 +75,11 @@ def binary_array_to_uint8s(arr):
51
 
52
  def compute_hashes(url: str, fps=FPS):
53
  try:
54
- clip = VideoFileClip(download_video_from_url(url))
 
55
  except IOError:
56
  logging.warn(f"Falling back to direct streaming from {url} because the downloaded video failed.")
57
- clip = VideoFileClip(url)
58
 
59
  for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
60
  # Each frame is a triplet of size (height, width, 3) of the video since it is RGB
 
3
  import shutil
4
  import logging
5
  import hashlib
6
+ import time
7
 
8
  from PIL import Image
9
  import imagehash
10
  from moviepy.editor import VideoFileClip
11
+ from moviepy.video.fx.all import crop
12
+ import numpy as np
13
+ from pytube import YouTube
14
 
15
  from config import FPS, VIDEO_DIRECTORY
16
 
 
21
 
22
  def download_video_from_url(url):
23
  """Download video from url or return md5 hash as video name"""
24
+ start = time.time()
25
  filepath = filepath_from_url(url)
26
+
27
+ # Check if it exists already
28
  if not os.path.exists(filepath):
29
+ # For YouTube links
30
+ if url.startswith('https://www.youtube.com') or url.startswith('youtube.com') or url.startswith('http://www.youtube.com'):
31
+ file_dir = '/'.join(x for x in filepath.split('/')[:-1])
32
+ filename = filepath.split('/')[-1]
33
+ logging.info(f"file_dir = {file_dir}")
34
+ logging.info(f"filename = {filename}")
35
+ YouTube(url).streams.get_highest_resolution().download(file_dir, skip_existing = False, filename = filename)
36
+ logging.info(f"Downloaded YouTube video from {url} to {filepath} in {time.time() - start:.1f} seconds.")
37
+ return filepath
38
+
39
+ # Works for basically all links, except youtube
40
  with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
41
+ logging.info(f"Starting copyfileobj on {f}")
42
+ shutil.copyfileobj(f, fileout, length=16*1024*1024)
43
+ logging.info(f"Downloaded video from {url} to {filepath} in {time.time() - start:.1f} seconds.")
44
  else:
45
  logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
46
  return filepath
 
58
  clip.reader.lastread = clip.reader.read_frame()
59
  return clip
60
 
61
+ def crop_video(clip, crop_percentage=0.75, w=224, h=224):
62
+ # Original width and height- which combined with crop_percentage determines the size of the new video
63
+ ow, oh = clip.size
64
+
65
+ logging.info(f"Cropping and resizing video to ({w}, {h})")
66
+ return crop(clip, x_center=ow/2, y_center=oh/2, width=int(ow*crop_percentage), height=int(crop_percentage*oh)).resize((w,h))
67
+
68
  def compute_hash(frame, hash_size=16):
69
  image = Image.fromarray(np.array(frame))
70
  return imagehash.phash(image, hash_size)
 
75
 
76
  def compute_hashes(url: str, fps=FPS):
77
  try:
78
+ filepath = download_video_from_url(url)
79
+ clip = crop_video(VideoFileClip(filepath))
80
  except IOError:
81
  logging.warn(f"Falling back to direct streaming from {url} because the downloaded video failed.")
82
+ clip = crop_video(VideoFileClip(url))
83
 
84
  for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
85
  # Each frame is a triplet of size (height, width, 3) of the video since it is RGB