Spaces:

GenAIJake
/

FrameVis

Sleeping

File size: 21,682 Bytes

d80a719

#
#  Project     FrameVis - Video Frame Visualizer Script
#  @author     David Madison
#  @link       github.com/dmadison/FrameVis
#  @version    v1.0.1
#  @license    MIT - Copyright (c) 2019 David Madison
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#

import cv2
import numpy as np
import argparse
from enum import Enum, auto
import time


class FrameVis:
	"""
	Reads a video file and outputs an image comprised of n resized frames, spread evenly throughout the file.
	"""

	default_frame_height = None  # auto, or in pixels
	default_frame_width = None  # auto, or in pixels
	default_concat_size = 1  # size of concatenated frame if automatically calculated, in pixels
	default_direction = "horizontal"  # left to right

	def visualize(self, source, nframes, height=default_frame_height, width=default_frame_width, \
		direction=default_direction, trim=False, quiet=True):
		"""
		Reads a video file and outputs an image comprised of n resized frames, spread evenly throughout the file.

		Parameters:
			source (str): filepath to source video file
			nframes (int): number of frames to process from the video
			height (int): height of each frame, in pixels
			width (int): width of each frame, in pixels
			direction (str): direction to concatenate frames ("horizontal" or "vertical")
			quiet (bool): suppress console messages

		Returns:
			visualization image as numpy array
		"""

		video = cv2.VideoCapture(source)  # open video file
		if not video.isOpened():
			raise FileNotFoundError("Source Video Not Found")

		if not quiet:
			print("")  # create space from script call line
		
		# calculate keyframe interval
		video_total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)  # retrieve total frame count from metadata
		if not isinstance(nframes, int) or nframes < 1:
			raise ValueError("Number of frames must be a positive integer")
		elif nframes > video_total_frames:
			raise ValueError("Requested frame count larger than total available ({})".format(video_total_frames))
		keyframe_interval = video_total_frames / nframes  # calculate number of frames between captures

		# grab frame for dimension calculations
		success,image = video.read()  # get first frame
		if not success:
			raise IOError("Cannot read from video file")

		# calculate letterbox / pillarbox trimming, if specified
		matte_type = 0
		if trim == True:
			if not quiet:
				print("Trimming enabled, checking matting... ", end="", flush=True)

			# 10 frame samples, seen as matted if an axis has all color channels at 3 / 255 or lower (avg)
			success, cropping_bounds = MatteTrimmer.determine_video_bounds(source, 10, 3)

			matte_type = 0
			if success:  # only calculate cropping if bounds are valid
				crop_width = cropping_bounds[1][0] - cropping_bounds[0][0] + 1
				crop_height = cropping_bounds[1][1] - cropping_bounds[0][1] + 1

				if crop_height != image.shape[0]:  # letterboxing
					matte_type += 1
				if crop_width != image.shape[1]:  # pillarboxing
					matte_type +=2
			
			if not quiet:
				if matte_type == 0:
					print("no matting detected")
				elif matte_type == 1:
					print("letterboxing detected, cropping {} px from the top and bottom".format(int((image.shape[0] - crop_height) / 2)))
				elif matte_type == 2:
					print("pillarboxing detected, trimming {} px from the sides".format(int((image.shape[1] - crop_width) / 2)))
				elif matte_type == 3:
					print("multiple matting detected - cropping ({}, {}) to ({}, {})".format(image.shape[1], image.shape[0], crop_width, crop_height))

		# calculate height
		if height is None:  # auto-calculate
			if direction == "horizontal":  # non-concat, use video size
				if matte_type & 1 == 1:  # letterboxing present
					height = crop_height
				else:
					height = image.shape[0]  # save frame height
			else:  # concat, use default value
				height = FrameVis.default_concat_size
		elif not isinstance(height, int) or height < 1:
			raise ValueError("Frame height must be a positive integer")
		
		# calculate width
		if width is None:  # auto-calculate
			if direction == "vertical":  # non-concat, use video size
				if matte_type & 2 == 2:  # pillarboxing present
					width = crop_width
				else:
					width = image.shape[1]  # save frame width
			else:  # concat, use default value
				width = FrameVis.default_concat_size
		elif not isinstance(width, int) or width < 1:
			raise ValueError("Frame width must be a positive integer")

		# assign direction function and calculate output size
		if direction == "horizontal":
			concatenate = cv2.hconcat
			output_width = width * nframes
			output_height = height
		elif direction == "vertical":
			concatenate = cv2.vconcat
			output_width = width
			output_height = height * nframes
		else:
			raise ValueError("Invalid direction specified")

		if not quiet:
			aspect_ratio = output_width / output_height
			print("Visualizing \"{}\" - {} by {} ({:.2f}), from {} frames (every {:.2f} seconds)"\
				.format(source, output_width, output_height, aspect_ratio, nframes, FrameVis.interval_from_nframes(source, nframes)))

		# set up for the frame processing loop
		next_keyframe = keyframe_interval / 2  # frame number for the next frame grab, starting evenly offset from start/end
		finished_frames = 0  # counter for number of processed frames
		output_image = None
		progress = ProgressBar("Processing:")

		while True:
			if finished_frames == nframes:
				break  # done!

			video.set(cv2.CAP_PROP_POS_FRAMES, int(next_keyframe))  # move cursor to next sampled frame
			success,image = video.read()  # read the next frame

			if not success:
				raise IOError("Cannot read from video file (frame {} out of {})".format(int(next_keyframe), video_total_frames))

			if matte_type != 0:  # crop out matting, if specified and matting is present
				image = MatteTrimmer.crop_image(image, cropping_bounds)

			image = cv2.resize(image, (width, height))  # resize to output size

			# save to output image
			if output_image is None:
				output_image = image
			else:
				output_image = concatenate([output_image, image])  # concatenate horizontally from left -> right

			finished_frames += 1
			next_keyframe += keyframe_interval  # set next frame capture time, maintaining floats

			if not quiet:
				progress.write(finished_frames / nframes)  # print progress bar to the console

		video.release()  # close video capture

		return output_image

	@staticmethod
	def average_image(image, direction):
		"""
		Averages the colors in an axis across an entire image

		Parameters:
			image (arr x.y.c): image as 3-dimensional numpy array
			direction (str): direction to average frames ("horizontal" or "vertical")

		Returns:
			image, with pixel data averaged along provided axis
		"""

		height, width, depth = image.shape

		if direction == "horizontal":
			scale_height = 1
			scale_width = width
		elif direction == "vertical":
			scale_height = height
			scale_width = 1
		else:
			raise ValueError("Invalid direction specified")

		image = cv2.resize(image, (scale_width, scale_height))  # scale down to '1', averaging values
		image = cv2.resize(image, (width, height))  # scale back up to size

		return image

	@staticmethod
	def motion_blur(image, direction, blur_amount):
		"""
		Blurs the pixels in a given axis across an entire image.

		Parameters:
			image (arr x.y.c): image as 3-dimensional numpy array
			direction (str): direction of stacked images for blurring ("horizontal" or "vertical")
			blur_amount (int): how much to blur the image, as the convolution kernel size

		Returns:
			image, with pixel data blurred along provided axis
		"""
		
		kernel = np.zeros((blur_amount, blur_amount))  # create convolution kernel

		# fill group with '1's
		if direction == "horizontal":
			kernel[:, int((blur_amount - 1)/2)] = np.ones(blur_amount)  # fill center column (blurring vertically for horizontal concat)
		elif direction == "vertical":
			kernel[int((blur_amount - 1)/2), :] = np.ones(blur_amount)  # fill center row (blurring horizontally for vertical concat)
		else:
			raise ValueError("Invalid direction specified")

		kernel /= blur_amount  # normalize kernel matrix

		return cv2.filter2D(image, -1, kernel)  # filter using kernel with same depth as source

	@staticmethod
	def nframes_from_interval(source, interval):
		"""
		Calculates the number of frames available in a video file for a given capture interval

		Parameters:
			source (str): filepath to source video file
			interval (float): capture frame every i seconds

		Returns:
			number of frames per time interval (int)
		"""
		video = cv2.VideoCapture(source)  # open video file
		if not video.isOpened():
			raise FileNotFoundError("Source Video Not Found")

		frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)  # total number of frames
		fps = video.get(cv2.CAP_PROP_FPS)  # framerate of the video
		duration = frame_count / fps  # duration of the video, in seconds

		video.release()  # close video capture

		return int(round(duration / interval))  # number of frames per interval

	@staticmethod
	def interval_from_nframes(source, nframes):
		"""
		Calculates the capture interval, in seconds, for a video file given the
		number of frames to capture

		Parameters:
			source (str): filepath to source video file
			nframes (int): number of frames to capture from the video file

		Returns:
			time interval (seconds) between frame captures (float)
		"""
		video = cv2.VideoCapture(source)  # open video file
		if not video.isOpened():
			raise FileNotFoundError("Source Video Not Found")

		frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)  # total number of frames
		fps = video.get(cv2.CAP_PROP_FPS)  # framerate of the video
		keyframe_interval = frame_count / nframes  # calculate number of frames between captures

		video.release()  # close video capture

		return keyframe_interval / fps  # seconds between captures


class MatteTrimmer:
	"""
	Functions for finding and removing black mattes around video frames
	"""

	@staticmethod
	def find_matrix_edges(matrix, threshold):
		"""
		Finds the start and end points of a 1D array above a given threshold

		Parameters:
			matrix (arr, 1.x): 1D array of data to check
			threshold (value): valid data is above this trigger level

		Returns:
			tuple with the array indices of data bounds, start and end
		"""

		if not isinstance(matrix, (list, tuple, np.ndarray)) or len(matrix.shape) != 1:
			raise ValueError("Provided matrix is not the right size (must be 1D)")

		data_start = None
		data_end = None

		for value_id, value in enumerate(matrix):
			if value > threshold:
				if data_start is None:
					data_start = value_id
				data_end = value_id

		return (data_start, data_end)

	@staticmethod
	def find_larger_bound(first, second):
		"""
		Takes two sets of diagonal rectangular boundary coordinates and determines
		the set of rectangular boundary coordinates that contains both

		Parameters:
			first  (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]
			second (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]

			Where for both arrays the first coordinate is in the top left-hand corner, 
			and the second coordinate is in the bottom right-hand corner.

		Returns:
			numpy coordinate matrix containing both of the provided boundaries
		"""
		left_edge  = first[0][0] if first[0][0] <= second[0][0] else second[0][0]
		right_edge = first[1][0] if first[1][0] >= second[1][0] else second[1][0]

		top_edge = first[0][1] if first[0][1] <= second[0][1] else second[0][1]
		bottom_edge = first[1][1] if first[1][1] >= second[1][1] else second[1][1]

		return np.array([[left_edge, top_edge], [right_edge, bottom_edge]])

	@staticmethod
	def valid_bounds(bounds):
		"""
		Checks if the frame bounds are a valid format

		Parameters:
			bounds (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]

		Returns:
			True or False
		"""

		for x, x_coordinate in enumerate(bounds):
			for y, y_coordinate in enumerate(bounds):
				if bounds[x][y] is None:
					return False  # not a number

		if bounds[0][0] > bounds[1][0]  or \
			bounds[0][1] > bounds[1][1]:
			return False  # left > right or top > bottom

		return True

	@staticmethod
	def determine_image_bounds(image, threshold):
		"""
		Determines if there are any hard mattes (black bars) surrounding
		an image on either the top (letterboxing) or the sides (pillarboxing)

		Parameters:
			image (arr, x.y.c): image as 3-dimensional numpy array
			threshold (8-bit int): min color channel value to judge as 'image present'

		Returns:
			success (bool): True or False if the bounds are valid
			image_bounds: numpy coordinate matrix with the two opposite corners of the 
				image bounds, in the form [(X,Y), (X,Y)]
		"""

		height, width, depth = image.shape

		# check for letterboxing
		horizontal_sums = np.sum(image, axis=(1,2))  # sum all color channels across all rows
		hthreshold = (threshold * width * depth)  # must be below every pixel having a value of "threshold" in every channel
		vertical_edges = MatteTrimmer.find_matrix_edges(horizontal_sums, hthreshold)

		# check for pillarboxing
		vertical_sums = np.sum(image, axis=(0,2))  # sum all color channels across all columns
		vthreshold = (threshold * height * depth)  # must be below every pixel having a value of "threshold" in every channel
		horizontal_edges = MatteTrimmer.find_matrix_edges(vertical_sums, vthreshold)

		image_bounds = np.array([[horizontal_edges[0], vertical_edges[0]], [horizontal_edges[1], vertical_edges[1]]])

		return MatteTrimmer.valid_bounds(image_bounds), image_bounds

	@staticmethod
	def determine_video_bounds(source, nsamples, threshold):
		"""
		Determines if any matting exists in a video source

		Parameters:
			source (str): filepath to source video file
			nsamples (int): number of frames from the video to determine bounds,
				evenly spaced throughout the video
			threshold (8-bit int): min color channel value to judge as 'image present'

		Returns:
			success (bool): True or False if the bounds are valid
			video_bounds: numpy coordinate matrix with the two opposite corners of the 
				video bounds, in the form [(X,Y), (X,Y)]
		"""
		video = cv2.VideoCapture(source)  # open video file
		if not video.isOpened():
			raise FileNotFoundError("Source Video Not Found")

		video_total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)  # retrieve total frame count from metadata
		if not isinstance(nsamples, int) or nsamples < 1:
			raise ValueError("Number of samples must be a positive integer")
		keyframe_interval = video_total_frames / nsamples  # calculate number of frames between captures

		# open video to make results consistent with visualizer
		# (this also GREATLY increases the read speed? no idea why)
		success,image = video.read()  # get first frame
		if not success:
			raise IOError("Cannot read from video file")

		next_keyframe = keyframe_interval / 2  # frame number for the next frame grab, starting evenly offset from start/end
		video_bounds = None

		for frame_number in range(nsamples):
			video.set(cv2.CAP_PROP_POS_FRAMES, int(next_keyframe))  # move cursor to next sampled frame
			success,image = video.read()  # read the next frame

			if not success:
				raise IOError("Cannot read from video file")
			
			success, frame_bounds = MatteTrimmer.determine_image_bounds(image, threshold)

			if not success:
				continue  # don't compare bounds, frame bounds are invalid

			video_bounds = frame_bounds if video_bounds is None else MatteTrimmer.find_larger_bound(video_bounds, frame_bounds)
			next_keyframe += keyframe_interval  # set next frame capture time, maintaining floats

		video.release()  # close video capture

		return MatteTrimmer.valid_bounds(video_bounds), video_bounds

	@staticmethod
	def crop_image(image, bounds):
		"""
		Crops a provided image by the coordinate bounds pair provided.

		Parameters:
			image (arr, x.y.c): image as 3-dimensional numpy array
			second (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]

		Returns:
			image as 3-dimensional numpy array, cropped to the coordinate bounds
		"""
		return image[bounds[0][1]:bounds[1][1], bounds[0][0]:bounds[1][0]]

class ProgressBar:
	"""
	Generates a progress bar for the console output

	Args:
		pre (str): string to prepend before the progress bar
		bar_length (int): length of the progress bar itself, in characters
		print_elapsed (bool): option to print time elapsed or not

	Attributes:
		pre (str): string to prepend before the progress bar
		bar_length (int): length of the progress bar itself, in characters
		print_time (bool): option to print time elapsed or not
		print_elapsed (int): starting time for the progress bar, in unix seconds

	"""

	def __init__(self, pre="", bar_length=25, print_elapsed=True):
		pre = (pre + '\t') if pre != "" else pre  # append separator if string present
		self.pre = pre
		self.bar_length = bar_length
		self.print_elapsed = print_elapsed
		if self.print_elapsed:
			self.__start_time = time.time()  # store start time as unix

	def write(self, percent):
		"""Prints a progress bar to the console based on the input percentage (float)."""
		term_char = '\r' if percent < 1.0 else '\n'  # rewrite the line unless finished

		filled_size = int(round(self.bar_length * percent))  # number of 'filled' characters in the bar
		progress_bar = "#" * filled_size + " " * (self.bar_length - filled_size)  # progress bar characters, as a string

		time_string = ""
		if self.print_elapsed:
			time_elapsed = time.time() - self.__start_time
			time_string = "\tTime Elapsed: {}".format(time.strftime("%H:%M:%S", time.gmtime(time_elapsed)))

		print("{}[{}]\t{:.2%}{}".format(self.pre, progress_bar, percent, time_string), end=term_char, flush=True)
		


def main():
	parser = argparse.ArgumentParser(description="video frame visualizer and movie barcode generator", add_help=False)  # removing help so I can use '-h' for height

	parser.add_argument("source", help="file path for the video file to be visualized", type=str)
	parser.add_argument("destination", help="file path output for the final image", type=str)
	parser.add_argument("-n", "--nframes", help="the number of frames in the visualization", type=int)
	parser.add_argument("-i", "--interval", help="interval between frames for the visualization", type=float)
	parser.add_argument("-h", "--height", help="the height of each frame, in pixels", type=int, default=FrameVis.default_frame_height)
	parser.add_argument("-w", "--width", help="the output width of each frame, in pixels", type=int, default=FrameVis.default_frame_width)
	parser.add_argument("-d", "--direction", help="direction to concatenate frames, horizontal or vertical", type=str, \
		choices=["horizontal", "vertical"],	default=FrameVis.default_direction)
	parser.add_argument("-t", "--trim", help="detect and trim any hard matting (letterboxing or pillarboxing)", action='store_true', default=False)
	parser.add_argument("-a", "--average", help="average colors for each frame", action='store_true', default=False)
	parser.add_argument("-b", "--blur", help="apply motion blur to the frames (kernel size)", type=int, nargs='?', const=100, default=0)
	parser.add_argument("-q", "--quiet", help="mute console outputs", action='store_true', default=False)
	parser.add_argument("--help", action="help", help="show this help message and exit")

	args = parser.parse_args()

	# check number of frames arguments
	if args.nframes is None:
		if args.interval is not None:  # calculate nframes from interval
			args.nframes = FrameVis.nframes_from_interval(args.source, args.interval)
		else:
			parser.error("You must provide either an --(n)frames or --(i)nterval argument")

	# check postprocessing arguments
	if args.average is True and args.blur != 0:
		parser.error("Cannot (a)verage and (b)lur, you must choose one or the other")

	fv = FrameVis()

	output_image = fv.visualize(args.source, args.nframes, height=args.height, width=args.width, \
		direction=args.direction, trim=args.trim, quiet=args.quiet)

	# postprocess
	if args.average or args.blur != 0:
		if args.average:
			if not args.quiet:
				print("Averaging frame colors... ", end="", flush=True)
			output_image = fv.average_image(output_image, args.direction)
		
		if args.blur != 0:
			if not args.quiet:
				print("Adding motion blur to final frame... ", end="", flush=True)
			output_image = fv.motion_blur(output_image, args.direction, args.blur)

		if not args.quiet:
			print("done")
	
	cv2.imwrite(args.destination, output_image)  # save visualization to file

	if not args.quiet:
		print("Visualization saved to {}".format(args.destination))


if __name__ == "__main__":
	main()