Spaces:

aslasdlkj
/

Podfusion

Configuration error

File size: 7,257 Bytes

import os
import sys
from cgi import test
from pathlib import Path

import cv2
import mediapy
import numpy as np
from frame_interpolation.eval import interpolator, util
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
from moviepy.editor import CompositeVideoClip
from moviepy.editor import VideoFileClip as vfc
from PIL import Image


# get key positions at which frame needs to be generated
def list_of_positions(num_contours, num_frames=100):
    positions = []
    for i in range(0, num_frames):
        positions.append(int(num_contours / num_frames * i))
    return positions


def contourfinder(image1, image2, text=None, num_frames=100, output_dir=Path("temp")):
    # Create two blank pages to write into
    # I just hardcoded 1024*1024 as the size, ideally this should be np.shape(image1)
    blank = np.zeros(np.shape(image1), dtype="uint8")
    blank2 = np.zeros(np.shape(image2), dtype="uint8")
    # Threshold and contours for image 1 and 2
    threshold = cv2.Canny(image=image1, threshold1=100, threshold2=200)
    contours, hierarchies = cv2.findContours(
        threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
    )

    threshold2 = cv2.Canny(image=image2, threshold1=100, threshold2=200)
    contours2, hierarchies2 = cv2.findContours(
        threshold2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
    )

    # Initialize three empty videos
    vid1 = cv2.VideoWriter(
        Path(output_dir / "vid1.mp4").as_posix(),
        cv2.VideoWriter_fourcc(*"mp4v"),
        24,
        threshold.shape,
    )
    vid2 = cv2.VideoWriter(
        Path(output_dir / "vid2.mp4").as_posix(),
        cv2.VideoWriter_fourcc(*"mp4v"),
        24,
        threshold.shape,
    )
    text_vid = cv2.VideoWriter(
        Path(output_dir / "text_video.mp4").as_posix(),
        cv2.VideoWriter_fourcc(*"mp4v"),
        10,
        threshold.shape,
    )

    # Get positions
    positions = list_of_positions((len(contours)))
    frames = []

    # Loop over contours adding them to blank image then writing to video
    for i in range(0, len(contours)):
        cv2.drawContours(
            blank, contours=contours, contourIdx=i, color=(125, 200, 255), thickness=1
        )

        if i in positions:
            frames.append(blank)
            # Complile to video
            vid1.write(blank)

    vid1.release()
    clip1 = vfc(Path(output_dir / "vid1.mp4").as_posix())
    positions = list_of_positions((len(contours2)))

    for i in range(0, len(contours2)):
        cv2.drawContours(
            blank2, contours=contours2, contourIdx=i, color=(125, 200, 255), thickness=1
        )
        if i in positions:
            frames.append(blank2)

            vid2.write(blank2)

    vid2.release()
    clip3 = vfc(Path(output_dir / "vid2.mp4").as_posix())

    # Next is the text vid

    if text != None:
        # Reading an image in default mode
        image = np.zeros(original.shape, dtype="uint8")

        # font
        font = cv2.FONT_HERSHEY_COMPLEX

        # org
        org = (10, 400)

        # fontScale
        fontScale = 3

        # Blue color in BGR
        color = (186, 184, 108)

        # Line thickness of 2 px
        thickness = 4

        def text_frames(text, image, org):
            spacing = 55  # spacing between letters
            blink = image
            cv2.imwrite(Path(output_dir / "blink.png").as_posix(), blink)
            for i in range(0, len(text) - 1):

                text_vid.write(blink)

                # Using cv2.putText() method
                image = cv2.putText(
                    image, text[i], org, font, fontScale, color, thickness, cv2.LINE_AA
                )

                # Take care of org spacing
                org = (org[0] + spacing, org[1])
                if text[i].isupper():
                    org = (org[0] + spacing + 1, org[1])
                    print(f"Upper {text[i]}")
                print(org)

                # Displaying the image
                cv2.imwrite(Path(output_dir / f"text_im{i}.png").as_posix, image)

                # Complile to video
                text_vid.write(image)
            text_vid.release()

        text_frames(text, image, org)
    return clip1, clip3


def load_model(model_name):
    model = interpolator.Interpolator(snapshot_download(repo_id=model_name), None)
    return model


model_names = [
    "akhaliq/frame-interpolation-film-style",
    "NimaBoscarino/frame-interpolation_film_l1",
    "NimaBoscarino/frame_interpolation_film_vgg",
]

models = {model_name: load_model(model_name) for model_name in model_names}

ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)


def resize(width, img):
    basewidth = width
    img = Image.open(img)
    wpercent = basewidth / float(img.size[0])
    hsize = int((float(img.size[1]) * float(wpercent)))
    img = img.resize((basewidth, hsize), Image.ANTIALIAS)
    return img


def resize_img(img1, img2, output_dir):
    img_target_size = Image.open(img1)
    img_to_resize = resize_and_crop(
        img2,
        (
            img_target_size.size[0],
            img_target_size.size[1],
        ),  # set width and height to match cv2_images[0]
        crop_origin="middle",
    )
    img_to_resize.save(Path(output_dir / "resized_img2.png"))


def get_video_frames(
    images, vid_output_dir="temp", times_to_interpolate=6, model_name_index=0
):
    frame1 = images[0]
    frame2 = images[1]

    model = models[model_names[model_name_index]]
    cv2_images = [cv2.imread(frame1), cv2.imread(frame2)]

    frame1 = resize(256, frame1)
    frame2 = resize(256, frame2)
    test_1 = Path(vid_output_dir / "test1.png")
    test_2 = Path(vid_output_dir / "test2.png")
    frame1.save(test_1)
    frame2.save(test_2)

    resize_img(test_1, test_2, vid_output_dir)
    input_frames = [
        Path(vid_output_dir / "test1.png").as_posix(),
        Path(vid_output_dir / "resized_img2.png").as_posix(),
    ]

    frames = list(
        util.interpolate_recursively_from_files(
            input_frames, times_to_interpolate, model
        )
    )
    return frames, cv2_images


def create_mp4_with_audio(frames, cv2_images, duration, audio, output_path):
    vid_output_dir = output_path.parent
    temp_vid_path = Path(vid_output_dir / "TEMP.mp4")
    mediapy.write_video(temp_vid_path, frames, fps=5)
    print(
        f"TYPES....{type(cv2_images[0])},{type(cv2_images[1])} SHAPES{cv2_images[0].shape} Img {cv2_images[0]}"
    )
    clip1, clip3 = contourfinder(
        cv2_images[0], cv2_images[1], vid_output_dir
    )  # has a third text option

    # Use open CV and moviepy code
    # So we move from open CV video 1 to out.mp4 to open CV video2
    clip1 = clip1
    clip2 = vfc(temp_vid_path).resize(8).set_start(clip1.duration - 0.5).crossfadein(2)
    clip3 = clip3.set_start((clip1.duration - 0.5) + (clip2.duration)).crossfadein(2)

    new_clip = CompositeVideoClip([clip1, clip2, clip3])
    new_clip.audio = audio  # Naviely append audio without considering the length of the video, could be a problem, no idea, but it works, so I'm not touching it
    new_clip.set_duration(duration)
    new_clip.write_videofile(output_path.as_posix(), audio_codec="aac")
    return output_path