Spaces:

infocusp
/

PoseSync-Video-Matching-Tool

Runtime error

File size: 10,274 Bytes

ec24258

import os

import cv2
import numpy as np
from moviepy.editor import ImageSequenceClip


def get_video_frames(video_path):
    """Reads a video frame by frame

    Args:
      video_path: A video path

    Returns:
      A list of numpy arrays representing video frames
    """

    vid_cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = vid_cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
        else:
            break

    return frames


class Plot:
    """Class for plotting keypoints on video frames and creating a video

    """
    KEYPOINT_EDGE_INDS_TO_COLOR = {
        (0, 1): "m",
        (0, 2): "c",
        (1, 3): "m",
        (2, 4): "c",
        (0, 5): "m",
        (0, 6): "c",
        (5, 7): "m",
        (7, 9): "m",
        (6, 8): "c",
        (8, 10): "c",
        (5, 6): "y",
        (5, 11): "m",
        (6, 12): "c",
        (11, 12): "y",
        (11, 13): "m",
        (13, 15): "m",
        (12, 14): "c",
        (14, 16): "c",
        }

    @staticmethod
    def add_keypoints_to_image(
        images_array, keypoints_list
        ):
        """
        Adds keypoints to the image

        Args:
          images_array: list of images to represent the keypoints
          keypoints_list : list of keypoints
          model: name of the model used to detect the keypoints
        Returns:
          None
        """

        output_overlay_array = images_array.astype(np.int32)
        output_overlay_list = Plot.draw_prediction_on_image(
            output_overlay_array, keypoints_list
            )
        return np.array(output_overlay_list)

    @staticmethod
    def draw_prediction_on_image(
        image_list, keypoints_list
        ):

        """Draws the keypoint predictions on image.

        Args:
          image_list: A numpy array with shape [n, height, width, channel]
          representing the
            pixel values of the input image where n is number of images.
          keypoints_list: A numpy array with shape [n, 17, 2] representing the
            coordinates of 17 keypoints where n is number of images.

        Returns:
          A numpy array with shape [n, out_height, out_width, channel]
          representing
          the list of
          images overlaid with keypoint predictions.
        """
        height, width, channel = image_list[0].shape

        keypoint_locs, keypoint_edges = Plot._keypoints_and_edges_for_display(
            keypoints_list,
            height, width
            )
        for img_i in range(keypoint_locs.shape[0]):
            for edge in keypoint_edges[img_i]:
                image = cv2.line(
                    image_list[img_i], (int(edge[0]), int(edge[1])),
                    (int(edge[2]), int(edge[3])), color=(0, 0, 255), thickness=3
                    )

            for center_x, center_y in keypoint_locs[img_i]:
                image = cv2.circle(
                    image_list[img_i], (int(center_x), int(center_y)), radius=5,
                    color=(255, 0, 0), thickness=-1
                    )

            image_list[img_i] = image

        return image_list

    @staticmethod
    def _keypoints_and_edges_for_display(
        keypoints_list, height, width,
        ):
        """Returns high confidence keypoints and edges for visualization.

        Args:
          keypoints_list: A numpy array with shape [1, 1, 17, 3] representing
          the keypoint coordinates and scores returned from the MoveNet model.
          height: height of the image in pixels.
          width: width of the image in pixels.
          keypoint_threshold: minimum confidence score for keypoint to be
          visualized.

        Returns:
          A (kpts_absolute_xy, edges_xy) containing:
            * array with shape [n, 17, 2] representing the coordinates of all
            keypoints of all detected entities in n images;
            * array with shape [n, 18, 4] representing the coordinates of all
            skeleton edges of all detected entities in n images;
        """
        kpts_x = width * keypoints_list[:, :, 0]
        kpts_y = height * keypoints_list[:, :, 1]

        edge_pair = np.array(list(Plot.KEYPOINT_EDGE_INDS_TO_COLOR.keys()))
        kpts_absolute_xy = np.stack(
            [kpts_x, kpts_y], axis=-1
            )

        x_start = kpts_x[:, edge_pair[:, 0]]
        y_start = kpts_y[:, edge_pair[:, 0]]
        x_end = kpts_x[:, edge_pair[:, 1]]
        y_end = kpts_y[:, edge_pair[:, 1]]

        edges = np.stack([x_start, y_start, x_end, y_end], axis=2)
        return kpts_absolute_xy, edges

    @staticmethod
    def resize_and_concat(ref_image_list, test_image_list):
        """Resizes either of reference frames list or test frames list to
        make both list of equal shape and merges both frames side by side

        Args:
          ref_image_list: A list of numpy array representing reference video
          frames
          test_image_list: A list of numpy array representing test video frames

        Returns:
          concat_img_list: A list of numpy array representing merged video
          frames

        """

        def pad_image(image_list, pad_axis, pad_len, odd_len_diff):
            """pads given number of pixels to image_list on given axis

            Args:
              image_list: A list of numpy array representing video frames
              pad_axis: A list of numpy array representing test video frames
              pad_len: number of pixels to pad on either side of frame
              odd_len_diff: 1 if difference between reference and test is odd
              else 0

            Returns:
              padded video frame

            """
            if pad_axis == 0:
                return np.pad(
                    image_list, (
                        (0, 0), (pad_len, pad_len + odd_len_diff), (0, 0),
                        (0, 0)), 'constant',
                    constant_values=(0)
                    )
            elif pad_axis == 1:
                return np.pad(
                    image_list, (
                        (0, 0), (0, 0), (pad_len, pad_len + odd_len_diff),
                        (0, 0)), 'constant',
                    constant_values=(0)
                    )

        ref_height, ref_width, _ = ref_image_list[0].shape
        test_height, test_width, _ = test_image_list[0].shape

        pad_height = abs(test_height - ref_height) // 2
        odd_height_diff = (test_height - ref_height) % 2

        if ref_height < test_height:
            ref_image_list = pad_image(
                ref_image_list, 0, pad_height, odd_height_diff
                )
        elif ref_height > test_height:
            test_image_list = pad_image(
                test_image_list, 0, pad_height, odd_height_diff
                )

        pad_width = abs(test_width - ref_width) // 2
        odd_width_diff = (test_width - ref_width) % 2

        if ref_width < test_width:
            ref_image_list = pad_image(
                ref_image_list, 1, pad_width, odd_width_diff
                )
        elif ref_width > test_width:
            test_image_list = pad_image(
                test_image_list, 1, pad_width, odd_width_diff
                )

        concat_img_list = np.concatenate(
            (ref_image_list, test_image_list), axis=2
            )
        return concat_img_list

    @staticmethod
    def overlay_score_on_images(image_list, scores):
        """writes score on given image list

        Args:
          image_list: A list of numpy array representing video frames
          scores: A list of score between reference and test keypoints

        Returns:
          A list of numpy array with score overlayed on it

        """
        for i in range(len(image_list)):
            image = image_list[i, :, :, :]

            txt = f"Score : {scores[i]}"

            image = cv2.putText(
                image, txt, (5, 10), cv2.FONT_HERSHEY_SIMPLEX,
                0.4, (255, 0, 0), 1, cv2.LINE_AA
                )

            image_list[i, :, :, :] = image

        return image_list

    def plot_matching(
        ref_frames, test_frames, ref_keypoints, test_keypoints, ref_frames_idx,
        test_frames_idx, costs, output_path
        ):
        """creates a video of reference and test video frames with keypoints
        overlayed on them

        Args:
          ref_frames: A list of numpy array representing reference video frames
          test_frames: A list of numpy array representing test video frames
          ref_keypoints: A list of numpy array representing reference video
          keypoints
          test_keypoints: A list of numpy array representing reference video
          keypoints
          ref_frames_idx: A list of reference frame indices
          test_frames_idx: A list of test frame indices
          costs: A list of score between reference and test keypoints
          output_path: path at which output video to be stored

        """
        if (ref_frames_idx is not None) and (
            test_frames_idx is not None) and len(ref_frames_idx) == len(
            test_frames_idx
            ):

            ref_frames = ref_frames[ref_frames_idx]
            test_frames = test_frames[test_frames_idx]

            ref_keypoints = ref_keypoints[ref_frames_idx]
            test_keypoints = test_keypoints[test_frames_idx]

        if costs is None:
            costs = ['N/A'] * len(ref_frames)

        display_line = [
            f"Cost: {costs[i]}     Ref frame: {ref_frames_idx[i]}    Test " \
            f"frame: {test_frames_idx[i]}"
            for i in range(len(ref_frames_idx))]

        ref_image_list = Plot.add_keypoints_to_image(
            ref_frames, ref_keypoints
            )
        test_image_list = Plot.add_keypoints_to_image(
            test_frames, test_keypoints
            )

        comparison_img_list = Plot.resize_and_concat(
            ref_image_list, test_image_list
            )

        comparison_img_list = Plot.overlay_score_on_images(
            comparison_img_list, display_line
            )

        video = ImageSequenceClip(list(comparison_img_list), fps=5)
        video.write_videofile(output_path, fps=5)