Spaces:

tobiasc
/

conex

Build error

App Files Files Community

conex / espnet2 /main_funcs /average_nbest_models.py

tobiasc

Initial commit

ad16788 over 2 years ago

raw

history blame

3.65 kB

	import logging
	from pathlib import Path
	from typing import Sequence
	from typing import Union
	import warnings

	import torch
	from typeguard import check_argument_types
	from typing import Collection

	from espnet2.train.reporter import Reporter


	@torch.no_grad()
	def average_nbest_models(
	output_dir: Path,
	reporter: Reporter,
	best_model_criterion: Sequence[Sequence[str]],
	nbest: Union[Collection[int], int],
	) -> None:
	"""Generate averaged model from n-best models

	Args:
	output_dir: The directory contains the model file for each epoch
	reporter: Reporter instance
	best_model_criterion: Give criterions to decide the best model.
	e.g. [("valid", "loss", "min"), ("train", "acc", "max")]
	nbest:
	"""
	assert check_argument_types()
	if isinstance(nbest, int):
	nbests = [nbest]
	else:
	nbests = list(nbest)
	if len(nbests) == 0:
	warnings.warn("At least 1 nbest values are required")
	nbests = [1]
	# 1. Get nbests: List[Tuple[str, str, List[Tuple[epoch, value]]]]
	nbest_epochs = [
	(ph, k, reporter.sort_epochs_and_values(ph, k, m)[: max(nbests)])
	for ph, k, m in best_model_criterion
	if reporter.has(ph, k)
	]

	_loaded = {}
	for ph, cr, epoch_and_values in nbest_epochs:
	_nbests = [i for i in nbests if i <= len(epoch_and_values)]
	if len(_nbests) == 0:
	_nbests = [1]

	for n in _nbests:
	if n == 0:
	continue
	elif n == 1:
	# The averaged model is same as the best model
	e, _ = epoch_and_values[0]
	op = output_dir / f"{e}epoch.pth"
	sym_op = output_dir / f"{ph}.{cr}.ave_1best.pth"
	if sym_op.is_symlink() or sym_op.exists():
	sym_op.unlink()
	sym_op.symlink_to(op.name)
	else:
	op = output_dir / f"{ph}.{cr}.ave_{n}best.pth"
	logging.info(
	f"Averaging {n}best models: " f'criterion="{ph}.{cr}": {op}'
	)

	avg = None
	# 2.a. Averaging model
	for e, _ in epoch_and_values[:n]:
	if e not in _loaded:
	_loaded[e] = torch.load(
	output_dir / f"{e}epoch.pth",
	map_location="cpu",
	)
	states = _loaded[e]

	if avg is None:
	avg = states
	else:
	# Accumulated
	for k in avg:
	avg[k] = avg[k] + states[k]
	for k in avg:
	if str(avg[k].dtype).startswith("torch.int"):
	# For int type, not averaged, but only accumulated.
	# e.g. BatchNorm.num_batches_tracked
	# (If there are any cases that requires averaging
	# or the other reducing method, e.g. max/min, for integer type,
	# please report.)
	pass
	else:
	avg[k] = avg[k] / n

	# 2.b. Save the ave model and create a symlink
	torch.save(avg, op)

	# 3. ..ave.pth is a symlink to the max ave model
	op = output_dir / f"{ph}.{cr}.ave_{max(_nbests)}best.pth"
	sym_op = output_dir / f"{ph}.{cr}.ave.pth"
	if sym_op.is_symlink() or sym_op.exists():
	sym_op.unlink()
	sym_op.symlink_to(op.name)