import os import sys from cgi import test from pathlib import Path import cv2 import mediapy import numpy as np from frame_interpolation.eval import interpolator, util from huggingface_hub import snapshot_download from image_tools.sizes import resize_and_crop from moviepy.editor import CompositeVideoClip from moviepy.editor import VideoFileClip as vfc from PIL import Image # get key positions at which frame needs to be generated def list_of_positions(num_contours, num_frames=100): positions = [] for i in range(0, num_frames): positions.append(int(num_contours / num_frames * i)) return positions def contourfinder(image1, image2, text=None, num_frames=100, output_dir=Path("temp")): # Create two blank pages to write into # I just hardcoded 1024*1024 as the size, ideally this should be np.shape(image1) blank = np.zeros(np.shape(image1), dtype="uint8") blank2 = np.zeros(np.shape(image2), dtype="uint8") # Threshold and contours for image 1 and 2 threshold = cv2.Canny(image=image1, threshold1=100, threshold2=200) contours, hierarchies = cv2.findContours( threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE ) threshold2 = cv2.Canny(image=image2, threshold1=100, threshold2=200) contours2, hierarchies2 = cv2.findContours( threshold2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE ) # Initialize three empty videos vid1 = cv2.VideoWriter( Path(output_dir / "vid1.mp4").as_posix(), cv2.VideoWriter_fourcc(*"mp4v"), 24, threshold.shape, ) vid2 = cv2.VideoWriter( Path(output_dir / "vid2.mp4").as_posix(), cv2.VideoWriter_fourcc(*"mp4v"), 24, threshold.shape, ) text_vid = cv2.VideoWriter( Path(output_dir / "text_video.mp4").as_posix(), cv2.VideoWriter_fourcc(*"mp4v"), 10, threshold.shape, ) # Get positions positions = list_of_positions((len(contours))) frames = [] # Loop over contours adding them to blank image then writing to video for i in range(0, len(contours)): cv2.drawContours( blank, contours=contours, contourIdx=i, color=(125, 200, 255), thickness=1 ) if i in positions: frames.append(blank) # Complile to video vid1.write(blank) vid1.release() clip1 = vfc(Path(output_dir / "vid1.mp4").as_posix()) positions = list_of_positions((len(contours2))) for i in range(0, len(contours2)): cv2.drawContours( blank2, contours=contours2, contourIdx=i, color=(125, 200, 255), thickness=1 ) if i in positions: frames.append(blank2) vid2.write(blank2) vid2.release() clip3 = vfc(Path(output_dir / "vid2.mp4").as_posix()) # Next is the text vid if text != None: # Reading an image in default mode image = np.zeros(original.shape, dtype="uint8") # font font = cv2.FONT_HERSHEY_COMPLEX # org org = (10, 400) # fontScale fontScale = 3 # Blue color in BGR color = (186, 184, 108) # Line thickness of 2 px thickness = 4 def text_frames(text, image, org): spacing = 55 # spacing between letters blink = image cv2.imwrite(Path(output_dir / "blink.png").as_posix(), blink) for i in range(0, len(text) - 1): text_vid.write(blink) # Using cv2.putText() method image = cv2.putText( image, text[i], org, font, fontScale, color, thickness, cv2.LINE_AA ) # Take care of org spacing org = (org[0] + spacing, org[1]) if text[i].isupper(): org = (org[0] + spacing + 1, org[1]) print(f"Upper {text[i]}") print(org) # Displaying the image cv2.imwrite(Path(output_dir / f"text_im{i}.png").as_posix, image) # Complile to video text_vid.write(image) text_vid.release() text_frames(text, image, org) return clip1, clip3 def load_model(model_name): model = interpolator.Interpolator(snapshot_download(repo_id=model_name), None) return model model_names = [ "akhaliq/frame-interpolation-film-style", "NimaBoscarino/frame-interpolation_film_l1", "NimaBoscarino/frame_interpolation_film_vgg", ] models = {model_name: load_model(model_name) for model_name in model_names} ffmpeg_path = util.get_ffmpeg_path() mediapy.set_ffmpeg(ffmpeg_path) def resize(width, img): basewidth = width img = Image.open(img) wpercent = basewidth / float(img.size[0]) hsize = int((float(img.size[1]) * float(wpercent))) img = img.resize((basewidth, hsize), Image.ANTIALIAS) return img def resize_img(img1, img2, output_dir): img_target_size = Image.open(img1) img_to_resize = resize_and_crop( img2, ( img_target_size.size[0], img_target_size.size[1], ), # set width and height to match cv2_images[0] crop_origin="middle", ) img_to_resize.save(Path(output_dir / "resized_img2.png")) def get_video_frames( images, vid_output_dir="temp", times_to_interpolate=6, model_name_index=0 ): frame1 = images[0] frame2 = images[1] model = models[model_names[model_name_index]] cv2_images = [cv2.imread(frame1), cv2.imread(frame2)] frame1 = resize(256, frame1) frame2 = resize(256, frame2) test_1 = Path(vid_output_dir / "test1.png") test_2 = Path(vid_output_dir / "test2.png") frame1.save(test_1) frame2.save(test_2) resize_img(test_1, test_2, vid_output_dir) input_frames = [ Path(vid_output_dir / "test1.png").as_posix(), Path(vid_output_dir / "resized_img2.png").as_posix(), ] frames = list( util.interpolate_recursively_from_files( input_frames, times_to_interpolate, model ) ) return frames, cv2_images def create_mp4_with_audio(frames, cv2_images, duration, audio, output_path): vid_output_dir = output_path.parent temp_vid_path = Path(vid_output_dir / "TEMP.mp4") mediapy.write_video(temp_vid_path, frames, fps=5) print( f"TYPES....{type(cv2_images[0])},{type(cv2_images[1])} SHAPES{cv2_images[0].shape} Img {cv2_images[0]}" ) clip1, clip3 = contourfinder( cv2_images[0], cv2_images[1], output_dir=vid_output_dir ) # has a third text option # Use open CV and moviepy code # So we move from open CV video 1 to out.mp4 to open CV video2 clip1 = clip1 clip2 = ( vfc(temp_vid_path.as_posix()) .resize(2) .set_start(clip1.duration - 0.5) .crossfadein(2) ) clip3 = clip3.set_start((clip1.duration - 0.5) + (clip2.duration)).crossfadein(2) new_clip = CompositeVideoClip([clip1, clip2, clip3]) new_clip.audio = audio # Naviely append audio without considering the length of the video, could be a problem, no idea, but it works, so I'm not touching it new_clip.set_duration(duration) new_clip.write_videofile(output_path.as_posix(), audio_codec="aac") return output_path.as_posix()