Spaces:
Build error
Build error
File size: 2,311 Bytes
7971a7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
import urllib.request
import logging
import hashlib
from PIL import Image
import imagehash
from moviepy.editor import VideoFileClip
import numpy as np
from config import FPS, VIDEO_DIRECTORY
def filepath_from_url(url):
"""Return filepath based on a md5 hash of a url."""
return os.path.join(VIDEO_DIRECTORY, hashlib.md5(url.encode()).hexdigest())
def download_video_from_url(url):
"""Download video from url or return md5 hash as video name"""
filepath = filepath_from_url(url)
if not os.path.exists(filepath):
with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
fileout.write(f.read())
logging.info(f"Downloaded video from {url} to {filepath}.")
else:
logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
return filepath
def change_ffmpeg_fps(clip, fps=FPS):
# Hacking the ffmpeg call based on
# https://github.com/Zulko/moviepy/blob/master/moviepy/video/io/ffmpeg_reader.py#L126
import subprocess as sp
cmd = [arg + ",fps=%d" % fps if arg.startswith("scale=") else arg for arg in clip.reader.proc.args]
clip.reader.close()
clip.reader.proc = sp.Popen(cmd, bufsize=clip.reader.bufsize,
stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.DEVNULL)
clip.fps = clip.reader.fps = fps
clip.reader.lastread = clip.reader.read_frame()
return clip
def compute_hash(frame, hash_size=16):
image = Image.fromarray(np.array(frame))
return imagehash.phash(image, hash_size)
def binary_array_to_uint8s(arr):
bit_string = ''.join(str(1 * x) for l in arr for x in l)
return [int(bit_string[i:i+8], 2) for i in range(0, len(bit_string), 8)]
def compute_hashes(url: str, fps=FPS):
clip = VideoFileClip(download_video_from_url(url))
for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
# Each frame is a triplet of size (height, width, 3) of the video since it is RGB
# The hash itself is of size (hash_size, hash_size)
# The uint8 version of the hash is of size (hash_size * highfreq_factor,) and represents the hash
hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
yield {"frame": 1+index*fps, "hash": hashed} |