SummerTime / tests /evaluation_test.py
aliabd
full demo working with old graido
7e3e85d
import unittest
from typing import Tuple, List, Dict
from evaluation import SUPPORTED_EVALUATION_METRICS
from helpers import print_with_color
class TestEvaluationMetrics(unittest.TestCase):
def get_summary_pairs(self, size: int = 1) -> Tuple[List[str]]:
test_output = (
[
"""
Glowing letters that had been hanging above
the Yankee stadium from 1976 to 2008 were placed for auction at
Sotheby’s on Wednesday, but were not sold, The current owner
of the sign is Reggie Jackson, a Yankee hall-of-famer."""
]
* size
)
test_target = (
[
"""
An auction for the lights from Yankee Stadium failed to
produce any bids on Wednesday at Sotheby’s. The lights,
currently owned by former Yankees player Reggie Jackson,
lit the stadium from 1976 until 2008."""
]
* size
)
return test_output, test_target
def test_evaluate(self):
print_with_color(f"{'#'*10} Testing all evaluation metrics... {'#'*10}\n", "35")
num_eval_metrics = 0
for metric_class in SUPPORTED_EVALUATION_METRICS:
# if metric_class in [Rouge, RougeWe]:
# # TODO: Temporarily skipping Rouge/RougeWE metrics to avoid local bug.
# continue
print_with_color(f"Testing {metric_class.metric_name}...", "35")
metric = metric_class()
test_output, test_target = self.get_summary_pairs()
score_dict = metric.evaluate(test_output, test_target)
print(f"{metric_class} output dictionary")
print(score_dict)
self.assertTrue(isinstance(score_dict, Dict))
self.assertNotEqual(score_dict, {})
for k, v in score_dict.items():
self.assertTrue(isinstance(k, str) and isinstance(v, float))
# # TODO: add metric score range assertions
# self.assertTrue(self.range[0] <= score_dict[k])
# self.assertTrue(score_dict[k] <= self.range[1])
print_with_color(f"{metric_class.metric_name} test complete\n", "32")
num_eval_metrics += 1
print_with_color(
f"{'#'*10} Evaluation metrics test complete ({num_eval_metrics} metrics) {'#'*10}",
"32",
)
if __name__ == "__main__":
unittest.main()