UniFormerV2 / utils.py
innat's picture
Update utils.py
57b6d0d
import tensorflow as tf
import numpy as np
from einops import rearrange
from decord import VideoReader
num_frames = 16
input_size = 224
patch_size = (16, 16)
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406])
IMAGENET_STD = np.array([0.229, 0.224, 0.225])
def format_frames(frame, output_size):
frame = tf.image.convert_image_dtype(frame, tf.uint8)
frame = tf.image.resize(frame, size=output_size)
frame = frame / 255.
frame = frame - IMAGENET_MEAN
frame = frame / IMAGENET_STD
return frame
def read_video(file_path):
container = VideoReader(file_path)
return container
def frame_sampling(container, num_frames):
interval = len(container) // num_frames
bids = np.arange(num_frames) * interval
offset = np.random.randint(interval, size=bids.shape)
frame_index = bids + offset
frames = container.get_batch(frame_index).asnumpy()
frames = np.stack(frames)
frames = format_frames(frames, [input_size] * 2)
return frames
def denormalize(z):
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.225, 0.225, 0.225])
x = (z * std) + mean
x = x * 255
return x.clip(0, 255)