Spaces:
Runtime error
Runtime error
# Copyright 2020 The HuggingFace Evaluate Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" COMET metric. | |
Requirements: | |
pip install unbabel-comet | |
Usage: | |
```python | |
from evaluate import load | |
comet_metric = load('metrics/comet/comet.py') | |
#comet_metric = load('comet') | |
#comet_metric = load('comet', 'wmt-large-hter-estimator') | |
source = ["Dem Feuer konnte Einhalt geboten werden", "Schulen und Kindergärten wurden eröffnet."] | |
hypothesis = ["The fire could be stopped", "Schools and kindergartens were open"] | |
reference = ["They were able to control the fire.", "Schools and kindergartens opened"] | |
predictions = comet_metric.compute(predictions=hypothesis, references=reference, sources=source) | |
predictions['scores'] | |
``` | |
""" | |
import comet # From: unbabel-comet | |
import datasets | |
import torch | |
import evaluate | |
logger = evaluate.logging.get_logger(__name__) | |
_CITATION = """\ | |
@inproceedings{rei-EtAl:2020:WMT, | |
author = {Rei, Ricardo and Stewart, Craig and Farinha, Ana C and Lavie, Alon}, | |
title = {Unbabel's Participation in the WMT20 Metrics Shared Task}, | |
booktitle = {Proceedings of the Fifth Conference on Machine Translation}, | |
month = {November}, | |
year = {2020}, | |
address = {Online}, | |
publisher = {Association for Computational Linguistics}, | |
pages = {909--918}, | |
} | |
@inproceedings{rei-etal-2020-comet, | |
title = "{COMET}: A Neural Framework for {MT} Evaluation", | |
author = "Rei, Ricardo and | |
Stewart, Craig and | |
Farinha, Ana C and | |
Lavie, Alon", | |
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)", | |
month = nov, | |
year = "2020", | |
address = "Online", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/2020.emnlp-main.213", | |
pages = "2685--2702", | |
} | |
""" | |
_DESCRIPTION = """\ | |
Crosslingual Optimized Metric for Evaluation of Translation (COMET) is an open-source framework used to train Machine Translation metrics that achieve high levels of correlation with different types of human judgments (HTER, DA's or MQM). | |
With the release of the framework the authors also released fully trained models that were used to compete in the WMT20 Metrics Shared Task achieving SOTA in that years competition. | |
See the [README.md] file at https://unbabel.github.io/COMET/html/models.html for more information. | |
""" | |
_KWARGS_DESCRIPTION = """ | |
COMET score. | |
Args: | |
`sources` (list of str): Source sentences | |
`predictions` (list of str): candidate translations | |
`references` (list of str): reference translations | |
`cuda` (bool): If set to True, runs COMET using GPU | |
`show_progress` (bool): Shows progress | |
`model`: COMET model to be used. Will default to `wmt-large-da-estimator-1719` if None. | |
Returns: | |
`samples`: List of dictionaries with `src`, `mt`, `ref` and `score`. | |
`scores`: List of scores. | |
Examples: | |
>>> comet_metric = evaluate.load('comet') | |
>>> # comet_metric = load('comet', 'wmt20-comet-da') # you can also choose which model to use | |
>>> source = ["Dem Feuer konnte Einhalt geboten werden", "Schulen und Kindergärten wurden eröffnet."] | |
>>> hypothesis = ["The fire could be stopped", "Schools and kindergartens were open"] | |
>>> reference = ["They were able to control the fire.", "Schools and kindergartens opened"] | |
>>> results = comet_metric.compute(predictions=hypothesis, references=reference, sources=source) | |
>>> print([round(v, 2) for v in results["scores"]]) | |
[0.19, 0.92] | |
""" | |
class COMET(evaluate.EvaluationModule): | |
def _info(self): | |
return evaluate.EvaluationModuleInfo( | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
homepage="https://unbabel.github.io/COMET/html/index.html", | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=datasets.Features( | |
{ | |
"sources": datasets.Value("string", id="sequence"), | |
"predictions": datasets.Value("string", id="sequence"), | |
"references": datasets.Value("string", id="sequence"), | |
} | |
), | |
codebase_urls=["https://github.com/Unbabel/COMET"], | |
reference_urls=[ | |
"https://github.com/Unbabel/COMET", | |
"https://www.aclweb.org/anthology/2020.emnlp-main.213/", | |
"http://www.statmt.org/wmt20/pdf/2020.wmt-1.101.pdf6", | |
], | |
) | |
def _download_and_prepare(self, dl_manager): | |
if self.config_name == "default": | |
self.scorer = comet.load_from_checkpoint(comet.download_model("wmt20-comet-da")) | |
else: | |
self.scorer = comet.load_from_checkpoint(comet.download_model(self.config_name)) | |
def _compute(self, sources, predictions, references, gpus=None, progress_bar=False): | |
if gpus is None: | |
gpus = 1 if torch.cuda.is_available() else 0 | |
data = {"src": sources, "mt": predictions, "ref": references} | |
data = [dict(zip(data, t)) for t in zip(*data.values())] | |
scores, mean_score = self.scorer.predict(data, gpus=gpus, progress_bar=progress_bar) | |
return {"mean_score": mean_score, "scores": scores} | |