AskAnything / util.py
ynhe's picture
[Init]
5423e78
from decord import VideoReader
from decord import cpu
import numpy as np
import torchvision.transforms as transforms
from transforms import (
GroupNormalize, GroupScale, GroupCenterCrop,
Stack, ToTorchFormatTensor
)
def loadvideo_decord(sample, sample_rate_scale=1,new_width=384, new_height=384, clip_len=8, frame_sample_rate=2,num_segment=1):
fname = sample
vr = VideoReader(fname, width=new_width, height=new_height,
num_threads=1, ctx=cpu(0))
# handle temporal segments
converted_len = int(clip_len * frame_sample_rate)
seg_len = len(vr) //num_segment
duration = max(len(vr) // vr.get_avg_fps(),8)
all_index = []
for i in range(num_segment):
index = np.linspace(0, seg_len, num=int(duration))
index = np.clip(index, 0, seg_len - 1).astype(np.int64)
index = index + i*seg_len
all_index.extend(list(index))
all_index = all_index[::int(sample_rate_scale)]
vr.seek(0)
buffer = vr.get_batch(all_index).asnumpy()
return buffer
def loadvideo_decord_origin(sample, sample_rate_scale=1,new_width=384, new_height=384, clip_len=8, frame_sample_rate=2,num_segment=1):
fname = sample
vr = VideoReader(fname,
num_threads=1, ctx=cpu(0))
# handle temporal segments
converted_len = int(clip_len * frame_sample_rate)
seg_len = len(vr) //num_segment
duration = max(len(vr) // vr.get_avg_fps(),8)
all_index = []
for i in range(num_segment):
index = np.linspace(0, seg_len, num=int(duration))
index = np.clip(index, 0, seg_len - 1).astype(np.int64)
index = index + i*seg_len
all_index.extend(list(index))
all_index = all_index[::int(sample_rate_scale)]
vr.seek(0)
buffer = vr.get_batch(all_index).asnumpy()
return buffer