Spaces:

mshukor
/

eP-ALM

Runtime error

App Files Files Community

eP-ALM / TimeSformer /timesformer /utils /misc.py

mshukor

init

3eb682b about 1 year ago

raw

history blame

No virus

12.2 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

	import json
	import logging
	import math
	import numpy as np
	import os
	from datetime import datetime
	import psutil
	import torch
	from fvcore.common.file_io import PathManager
	from fvcore.nn.activation_count import activation_count
	from fvcore.nn.flop_count import flop_count
	from matplotlib import pyplot as plt
	from torch import nn

	import timesformer.utils.logging as logging
	import timesformer.utils.multiprocessing as mpu
	from timesformer.datasets.utils import pack_pathway_output
	from timesformer.models.batchnorm_helper import SubBatchNorm3d

	logger = logging.get_logger(__name__)


	def check_nan_losses(loss):
	"""
	Determine whether the loss is NaN (not a number).
	Args:
	loss (loss): loss to check whether is NaN.
	"""
	if math.isnan(loss):
	raise RuntimeError("ERROR: Got NaN losses {}".format(datetime.now()))


	def params_count(model, ignore_bn=False):
	"""
	Compute the number of parameters.
	Args:
	model (model): model to count the number of parameters.
	"""
	if not ignore_bn:
	return np.sum([p.numel() for p in model.parameters()]).item()
	else:
	count = 0
	for m in model.modules():
	if not isinstance(m, nn.BatchNorm3d):
	for p in m.parameters(recurse=False):
	count += p.numel()
	return count


	def gpu_mem_usage():
	"""
	Compute the GPU memory usage for the current device (GB).
	"""
	if torch.cuda.is_available():
	mem_usage_bytes = torch.cuda.max_memory_allocated()
	else:
	mem_usage_bytes = 0
	return mem_usage_bytes / 1024 ** 3


	def cpu_mem_usage():
	"""
	Compute the system memory (RAM) usage for the current device (GB).
	Returns:
	usage (float): used memory (GB).
	total (float): total memory (GB).
	"""
	vram = psutil.virtual_memory()
	usage = (vram.total - vram.available) / 1024 ** 3
	total = vram.total / 1024 ** 3

	return usage, total


	def _get_model_analysis_input(cfg, use_train_input):
	"""
	Return a dummy input for model analysis with batch size 1. The input is
	used for analyzing the model (counting flops and activations etc.).
	Args:
	cfg (CfgNode): configs. Details can be found in
	lib/config/defaults.py
	use_train_input (bool): if True, return the input for training. Otherwise,
	return the input for testing.

	Returns:
	inputs: the input for model analysis.
	"""
	rgb_dimension = 3
	if use_train_input:
	input_tensors = torch.rand(
	rgb_dimension,
	cfg.DATA.NUM_FRAMES,
	cfg.DATA.TRAIN_CROP_SIZE,
	cfg.DATA.TRAIN_CROP_SIZE,
	)
	else:
	input_tensors = torch.rand(
	rgb_dimension,
	cfg.DATA.NUM_FRAMES,
	cfg.DATA.TEST_CROP_SIZE,
	cfg.DATA.TEST_CROP_SIZE,
	)
	if not cfg.MODEL.ARCH in ['resformer', 'vit']:
	model_inputs = pack_pathway_output(cfg, input_tensors)
	for i in range(len(model_inputs)):
	model_inputs[i] = model_inputs[i].unsqueeze(0)
	if cfg.NUM_GPUS:
	model_inputs[i] = model_inputs[i].cuda(non_blocking=True)

	else:
	model_inputs = input_tensors.cuda(non_blocking=True).unsqueeze(0)

	# If detection is enabled, count flops for one proposal.
	if cfg.DETECTION.ENABLE:
	bbox = torch.tensor([[0, 0, 1.0, 0, 1.0]])
	if cfg.NUM_GPUS:
	bbox = bbox.cuda()
	inputs = (model_inputs, bbox)
	else:
	inputs = (model_inputs,)
	return inputs


	def get_model_stats(model, cfg, mode, use_train_input):
	"""
	Compute statistics for the current model given the config.
	Args:
	model (model): model to perform analysis.
	cfg (CfgNode): configs. Details can be found in
	lib/config/defaults.py
	mode (str): Options include `flop` or `activation`. Compute either flop
	(gflops) or activation count (mega).
	use_train_input (bool): if True, compute statistics for training. Otherwise,
	compute statistics for testing.

	Returns:
	float: the total number of count of the given model.
	"""
	assert mode in [
	"flop",
	"activation",
	], "'{}' not supported for model analysis".format(mode)
	if mode == "flop":
	model_stats_fun = flop_count
	elif mode == "activation":
	model_stats_fun = activation_count

	# Set model to evaluation mode for analysis.
	# Evaluation mode can avoid getting stuck with sync batchnorm.
	model_mode = model.training
	model.eval()
	inputs = _get_model_analysis_input(cfg, use_train_input)
	count_dict, *_ = model_stats_fun(model, inputs)
	count = sum(count_dict.values())
	model.train(model_mode)
	return count


	def log_model_info(model, cfg, use_train_input=True):
	"""
	Log info, includes number of parameters, gpu usage, gflops and activation count.
	The model info is computed when the model is in validation mode.
	Args:
	model (model): model to log the info.
	cfg (CfgNode): configs. Details can be found in
	lib/config/defaults.py
	use_train_input (bool): if True, log info for training. Otherwise,
	log info for testing.
	"""
	logger.info("Model:\n{}".format(model))
	logger.info("Params: {:,}".format(params_count(model)))
	logger.info("Mem: {:,} MB".format(gpu_mem_usage()))
	logger.info(
	"Flops: {:,} G".format(
	get_model_stats(model, cfg, "flop", use_train_input)
	)
	)
	logger.info(
	"Activations: {:,} M".format(
	get_model_stats(model, cfg, "activation", use_train_input)
	)
	)
	logger.info("nvidia-smi")
	os.system("nvidia-smi")


	def is_eval_epoch(cfg, cur_epoch, multigrid_schedule):
	"""
	Determine if the model should be evaluated at the current epoch.
	Args:
	cfg (CfgNode): configs. Details can be found in
	lib/config/defaults.py
	cur_epoch (int): current epoch.
	multigrid_schedule (List): schedule for multigrid training.
	"""
	if cur_epoch + 1 == cfg.SOLVER.MAX_EPOCH:
	return True
	if multigrid_schedule is not None:
	prev_epoch = 0
	for s in multigrid_schedule:
	if cur_epoch < s[-1]:
	period = max(
	(s[-1] - prev_epoch) // cfg.MULTIGRID.EVAL_FREQ + 1, 1
	)
	return (s[-1] - 1 - cur_epoch) % period == 0
	prev_epoch = s[-1]

	return (cur_epoch + 1) % cfg.TRAIN.EVAL_PERIOD == 0


	def plot_input(tensor, bboxes=(), texts=(), path="./tmp_vis.png"):
	"""
	Plot the input tensor with the optional bounding box and save it to disk.
	Args:
	tensor (tensor): a tensor with shape of `NxCxHxW`.
	bboxes (tuple): bounding boxes with format of [[x, y, h, w]].
	texts (tuple): a tuple of string to plot.
	path (str): path to the image to save to.
	"""
	tensor = tensor.float()
	tensor = tensor - tensor.min()
	tensor = tensor / tensor.max()
	f, ax = plt.subplots(nrows=1, ncols=tensor.shape[0], figsize=(50, 20))
	for i in range(tensor.shape[0]):
	ax[i].axis("off")
	ax[i].imshow(tensor[i].permute(1, 2, 0))
	# ax[1][0].axis('off')
	if bboxes is not None and len(bboxes) > i:
	for box in bboxes[i]:
	x1, y1, x2, y2 = box
	ax[i].vlines(x1, y1, y2, colors="g", linestyles="solid")
	ax[i].vlines(x2, y1, y2, colors="g", linestyles="solid")
	ax[i].hlines(y1, x1, x2, colors="g", linestyles="solid")
	ax[i].hlines(y2, x1, x2, colors="g", linestyles="solid")

	if texts is not None and len(texts) > i:
	ax[i].text(0, 0, texts[i])
	f.savefig(path)


	def frozen_bn_stats(model):
	"""
	Set all the bn layers to eval mode.
	Args:
	model (model): model to set bn layers to eval mode.
	"""
	for m in model.modules():
	if isinstance(m, nn.BatchNorm3d):
	m.eval()


	def aggregate_sub_bn_stats(module):
	"""
	Recursively find all SubBN modules and aggregate sub-BN stats.
	Args:
	module (nn.Module)
	Returns:
	count (int): number of SubBN module found.
	"""
	count = 0
	for child in module.children():
	if isinstance(child, SubBatchNorm3d):
	child.aggregate_stats()
	count += 1
	else:
	count += aggregate_sub_bn_stats(child)
	return count


	def launch_job(cfg, init_method, func, daemon=False):
	"""
	Run 'func' on one or more GPUs, specified in cfg
	Args:
	cfg (CfgNode): configs. Details can be found in
	lib/config/defaults.py
	init_method (str): initialization method to launch the job with multiple
	devices.
	func (function): job to run on GPU(s)
	daemon (bool): The spawned processes’ daemon flag. If set to True,
	daemonic processes will be created
	"""
	if cfg.NUM_GPUS > 1:
	torch.multiprocessing.spawn(
	mpu.run,
	nprocs=cfg.NUM_GPUS,
	args=(
	cfg.NUM_GPUS,
	func,
	init_method,
	cfg.SHARD_ID,
	cfg.NUM_SHARDS,
	cfg.DIST_BACKEND,
	cfg,
	),
	daemon=daemon,
	)
	else:
	func(cfg=cfg)


	def get_class_names(path, parent_path=None, subset_path=None):
	"""
	Read json file with entries {classname: index} and return
	an array of class names in order.
	If parent_path is provided, load and map all children to their ids.
	Args:
	path (str): path to class ids json file.
	File must be in the format {"class1": id1, "class2": id2, ...}
	parent_path (Optional[str]): path to parent-child json file.
	File must be in the format {"parent1": ["child1", "child2", ...], ...}
	subset_path (Optional[str]): path to text file containing a subset
	of class names, separated by newline characters.
	Returns:
	class_names (list of strs): list of class names.
	class_parents (dict): a dictionary where key is the name of the parent class
	and value is a list of ids of the children classes.
	subset_ids (list of ints): list of ids of the classes provided in the
	subset file.
	"""
	try:
	with PathManager.open(path, "r") as f:
	class2idx = json.load(f)
	except Exception as err:
	print("Fail to load file from {} with error {}".format(path, err))
	return

	max_key = max(class2idx.values())
	class_names = [None] * (max_key + 1)

	for k, i in class2idx.items():
	class_names[i] = k

	class_parent = None
	if parent_path is not None and parent_path != "":
	try:
	with PathManager.open(parent_path, "r") as f:
	d_parent = json.load(f)
	except EnvironmentError as err:
	print(
	"Fail to load file from {} with error {}".format(
	parent_path, err
	)
	)
	return
	class_parent = {}
	for parent, children in d_parent.items():
	indices = [
	class2idx[c] for c in children if class2idx.get(c) is not None
	]
	class_parent[parent] = indices

	subset_ids = None
	if subset_path is not None and subset_path != "":
	try:
	with PathManager.open(subset_path, "r") as f:
	subset = f.read().split("\n")
	subset_ids = [
	class2idx[name]
	for name in subset
	if class2idx.get(name) is not None
	]
	except EnvironmentError as err:
	print(
	"Fail to load file from {} with error {}".format(
	subset_path, err
	)
	)
	return

	return class_names, class_parent, subset_ids