Spaces:

jesseplusplus
/

easy-translate

Running

App Files Files Community

easy-translate / eval.py

Iker

8 bits / 4 bits quantization, load any model, promoting.

d54a92e 12 months ago

raw history blame contribute delete

No virus

5.26 kB

	from dataset import ParallelTextReader
	from torch.utils.data import DataLoader
	from accelerate import find_executable_batch_size
	from evaluate import load
	from tqdm import tqdm
	import torch
	import json
	import argparse
	import numpy as np
	import os


	def get_dataloader(pred_path: str, gold_path: str, batch_size: int):
	"""
	Returns a dataloader for the given files.
	"""

	def collate_fn(batch):
	return list(map(list, zip(*batch)))

	reader = ParallelTextReader(pred_path=pred_path, gold_path=gold_path)
	dataloader = DataLoader(
	reader, batch_size=batch_size, collate_fn=collate_fn, num_workers=0
	)
	return dataloader


	def eval_files(
	pred_path: str,
	gold_path: str,
	bert_score_model: str,
	starting_batch_size: int = 128,
	output_path: str = None,
	):
	"""
	Evaluates the given files.
	"""
	if torch.cuda.is_available():
	device = "cuda:0"
	print("We will use a GPU to calculate BertScore.")
	else:
	device = "cpu"
	print(
	f"We will use the CPU to calculate BertScore, this can be slow for large datasets."
	)

	dataloader = get_dataloader(pred_path, gold_path, starting_batch_size)
	print("Loading sacrebleu...")
	sacrebleu = load("sacrebleu")
	print("Loading rouge...")
	rouge = load("rouge")
	print("Loading bleu...")
	bleu = load("bleu")
	print("Loading meteor...")
	meteor = load("meteor")
	print("Loading ter...")
	ter = load("ter")
	print("Loading BertScore...")
	bert_score = load("bertscore")

	with tqdm(total=len(dataloader.dataset), desc="Loading data...") as pbar:
	for predictions, references in dataloader:
	sacrebleu.add_batch(predictions=predictions, references=references)
	rouge.add_batch(predictions=predictions, references=references)
	bleu.add_batch(predictions=predictions, references=references)
	meteor.add_batch(predictions=predictions, references=references)
	ter.add_batch(predictions=predictions, references=references)
	bert_score.add_batch(predictions=predictions, references=references)
	pbar.update(len(predictions))

	result_dictionary = {"path": pred_path}
	print("Computing sacrebleu")
	result_dictionary["sacrebleu"] = sacrebleu.compute()
	print("Computing rouge score")
	result_dictionary["rouge"] = rouge.compute(
	use_aggregator=True, rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"]
	)
	print("Computing bleu score")
	result_dictionary["bleu"] = bleu.compute()
	print("Computing meteor score")
	result_dictionary["meteor"] = meteor.compute()
	print("Computing ter score")
	result_dictionary["ter"] = ter.compute()

	@find_executable_batch_size(starting_batch_size=starting_batch_size)
	def inference(batch_size):
	nonlocal bert_score, bert_score_model
	print(f"Computing bert score with batch size {batch_size} on {device}")
	results = bert_score.compute(
	model_type=bert_score_model,
	batch_size=batch_size,
	device=device,
	use_fast_tokenizer=True,
	)

	results["precision"] = np.average(results["precision"])
	results["recall"] = np.average(results["recall"])
	results["f1"] = np.average(results["f1"])

	return results

	result_dictionary["bert_score"] = inference()

	if output_path is not None:
	if not os.path.exists(os.path.abspath(os.path.dirname(output_path))):
	os.makedirs(os.path.abspath(os.path.dirname(output_path)))
	with open(output_path, "w") as f:
	json.dump(result_dictionary, f, indent=4)

	print(f"Results: {json.dumps(result_dictionary,indent=4)}")

	return result_dictionary


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Run the translation evaluation experiments"
	)
	parser.add_argument(
	"--pred_path",
	type=str,
	required=True,
	help="Path to a txt file containing the predicted sentences.",
	)

	parser.add_argument(
	"--gold_path",
	type=str,
	required=True,
	help="Path to a txt file containing the gold sentences.",
	)

	parser.add_argument(
	"--starting_batch_size",
	type=int,
	default=64,
	help="Starting batch size for BertScore, we will automatically reduce it if we find an OOM error.",
	)

	parser.add_argument(
	"--output_path",
	type=str,
	default=None,
	help="Path to a json file to save the results. If not given, the results will be printed to the console.",
	)

	parser.add_argument(
	"--bert_score_model",
	type=str,
	default="microsoft/deberta-xlarge-mnli",
	help="Model to use for BertScore. See: https://github.com/huggingface/datasets/tree/master/metrics/bertscore"
	"and https://github.com/Tiiiger/bert_score for more details.",
	)

	args = parser.parse_args()

	eval_files(
	pred_path=args.pred_path,
	gold_path=args.gold_path,
	starting_batch_size=args.starting_batch_size,
	output_path=args.output_path,
	bert_score_model=args.bert_score_model,
	)