|
""" |
|
MIT License |
|
|
|
Copyright (c) 2021 Wilson Yan |
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
|
of this software and associated documentation files (the "Software"), to deal |
|
in the Software without restriction, including without limitation the rights |
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
copies of the Software, and to permit persons to whom the Software is |
|
furnished to do so, subject to the following conditions: |
|
|
|
The above copyright notice and this permission notice shall be included in all |
|
copies or substantial portions of the Software. |
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
SOFTWARE. |
|
|
|
|
|
This file is copied from https://github.com/wilson1yan/VideoGPT/blob/master/videogpt/utils.py |
|
We adapted it to Hugging Face AutoModel for easier model loading. |
|
""" |
|
|
|
|
|
|
|
|
|
def shift_dim(x, src_dim=-1, dest_dim=-1, make_contiguous=True): |
|
n_dims = len(x.shape) |
|
if src_dim < 0: |
|
src_dim = n_dims + src_dim |
|
if dest_dim < 0: |
|
dest_dim = n_dims + dest_dim |
|
|
|
assert 0 <= src_dim < n_dims and 0 <= dest_dim < n_dims |
|
|
|
dims = list(range(n_dims)) |
|
del dims[src_dim] |
|
|
|
permutation = [] |
|
ctr = 0 |
|
for i in range(n_dims): |
|
if i == dest_dim: |
|
permutation.append(src_dim) |
|
else: |
|
permutation.append(dims[ctr]) |
|
ctr += 1 |
|
x = x.permute(permutation) |
|
if make_contiguous: |
|
x = x.contiguous() |
|
return x |
|
|
|
|
|
|
|
|
|
|
|
|
|
def view_range(x, i, j, shape): |
|
shape = tuple(shape) |
|
|
|
n_dims = len(x.shape) |
|
if i < 0: |
|
i = n_dims + i |
|
|
|
if j is None: |
|
j = n_dims |
|
elif j < 0: |
|
j = n_dims + j |
|
|
|
assert 0 <= i < j <= n_dims |
|
|
|
x_shape = x.shape |
|
target_shape = x_shape[:i] + shape + x_shape[j:] |
|
return x.view(target_shape) |
|
|
|
|
|
def tensor_slice(x, begin, size): |
|
assert all([b >= 0 for b in begin]) |
|
size = [l - b if s == -1 else s |
|
for s, b, l in zip(size, begin, x.shape)] |
|
assert all([s >= 0 for s in size]) |
|
|
|
slices = [slice(b, b + s) for b, s in zip(begin, size)] |
|
return x[slices] |
|
|
|
|
|
import math |
|
import numpy as np |
|
import skvideo.io |
|
def save_video_grid(video, fname, nrow=None): |
|
b, c, t, h, w = video.shape |
|
video = video.permute(0, 2, 3, 4, 1) |
|
video = (video.cpu().numpy() * 255).astype('uint8') |
|
|
|
if nrow is None: |
|
nrow = math.ceil(math.sqrt(b)) |
|
ncol = math.ceil(b / nrow) |
|
padding = 1 |
|
video_grid = np.zeros((t, (padding + h) * nrow + padding, |
|
(padding + w) * ncol + padding, c), dtype='uint8') |
|
for i in range(b): |
|
r = i // ncol |
|
c = i % ncol |
|
|
|
start_r = (padding + h) * r |
|
start_c = (padding + w) * c |
|
video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i] |
|
|
|
skvideo.io.vwrite(fname, video_grid, inputdict={'-r': '5'}) |
|
print('saved videos to', fname) |
|
|
|
|