Spaces:

DAMO-NLP-SG
/

Video-LLaMA

Running on A10G

Video-LLaMA / video_llama /processors /transforms_video.py

舟勤

45d16e9 about 1 year ago

No virus

5.02 kB

	#!/usr/bin/env python3
	"""
	Copyright (c) 2022, salesforce.com, inc.
	All rights reserved.
	SPDX-License-Identifier: BSD-3-Clause
	For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
	"""


	import numbers
	import random

	from torchvision.transforms import (
	RandomCrop,
	RandomResizedCrop,
	)

	import video_llama.processors.functional_video as F


	__all__ = [
	"RandomCropVideo",
	"RandomResizedCropVideo",
	"CenterCropVideo",
	"NormalizeVideo",
	"ToTensorVideo",
	"RandomHorizontalFlipVideo",
	]


	class RandomCropVideo(RandomCrop):
	def __init__(self, size):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: randomly cropped/resized video clip.
	size is (C, T, OH, OW)
	"""
	i, j, h, w = self.get_params(clip, self.size)
	return F.crop(clip, i, j, h, w)

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}(size={self.size})"


	class RandomResizedCropVideo(RandomResizedCrop):
	def __init__(
	self,
	size,
	scale=(0.08, 1.0),
	ratio=(3.0 / 4.0, 4.0 / 3.0),
	interpolation_mode="bilinear",
	):
	if isinstance(size, tuple):
	if len(size) != 2:
	raise ValueError(
	f"size should be tuple (height, width), instead got {size}"
	)
	self.size = size
	else:
	self.size = (size, size)

	self.interpolation_mode = interpolation_mode
	self.scale = scale
	self.ratio = ratio

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: randomly cropped/resized video clip.
	size is (C, T, H, W)
	"""
	i, j, h, w = self.get_params(clip, self.scale, self.ratio)
	return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})"


	class CenterCropVideo:
	def __init__(self, crop_size):
	if isinstance(crop_size, numbers.Number):
	self.crop_size = (int(crop_size), int(crop_size))
	else:
	self.crop_size = crop_size

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: central cropping of video clip. Size is
	(C, T, crop_size, crop_size)
	"""
	return F.center_crop(clip, self.crop_size)

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}(crop_size={self.crop_size})"


	class NormalizeVideo:
	"""
	Normalize the video clip by mean subtraction and division by standard deviation
	Args:
	mean (3-tuple): pixel RGB mean
	std (3-tuple): pixel RGB standard deviation
	inplace (boolean): whether do in-place normalization
	"""

	def __init__(self, mean, std, inplace=False):
	self.mean = mean
	self.std = std
	self.inplace = inplace

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
	"""
	return F.normalize(clip, self.mean, self.std, self.inplace)

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}(mean={self.mean}, std={self.std}, inplace={self.inplace})"


	class ToTensorVideo:
	"""
	Convert tensor data type from uint8 to float, divide value by 255.0 and
	permute the dimensions of clip tensor
	"""

	def __init__(self):
	pass

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
	Return:
	clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
	"""
	return F.to_tensor(clip)

	def __repr__(self) -> str:
	return self.__class__.__name__


	class RandomHorizontalFlipVideo:
	"""
	Flip the video clip along the horizonal direction with a given probability
	Args:
	p (float): probability of the clip being flipped. Default value is 0.5
	"""

	def __init__(self, p=0.5):
	self.p = p

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Size is (C, T, H, W)
	Return:
	clip (torch.tensor): Size is (C, T, H, W)
	"""
	if random.random() < self.p:
	clip = F.hflip(clip)
	return clip

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}(p={self.p})"