Spaces:
Runtime error
Runtime error
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""TODO: Add a description here.""" | |
from typing import final | |
import evaluate | |
import datasets | |
# TODO: Add BibTeX citation | |
_CITATION = """\ | |
@inproceedings{deng2021compression, | |
title={Compression, Transduction, and Creation: A Unified Framework for Evaluating Natural Language Generation}, | |
author={Deng, Mingkai and Tan, Bowen and Liu, Zhengzhong and Xing, Eric and Hu, Zhiting}, | |
booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, | |
pages={7580--7605}, | |
year={2021} | |
} | |
""" | |
# TODO: Add description of the module here | |
_DESCRIPTION = """\ | |
This repo contains code of an automatic evaluation metric described in the paper | |
Compression, Transduction, and Creation: A Unified Framework for Evaluating Natural Language Generation | |
""" | |
# TODO: Add description of the arguments of the module here | |
_KWARGS_DESCRIPTION = """ | |
Calculates how good are predictions given some references, using certain scores | |
Args: | |
predictions: List of texts (Hypothesis) to score. The list now only supports one piece of text | |
references: List of texts (Premise) to score. The list now only supports one piece of text | |
Returns: | |
ctc_score: The CTC score | |
Examples: | |
>>> ctc_score = evaluate.load("yzha/ctc_eval") | |
>>> results = ctc_score.compute(references=['hello world'], predictions=['hi world']) | |
>>> print(results) | |
{'ctc_score': 0.5211202502250671} | |
""" | |
# TODO: Define external resources urls if needed | |
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" | |
class CTC_Eval(evaluate.EvaluationModule): | |
"""TODO: Short description of my evaluation module.""" | |
def _info(self): | |
# TODO: Specifies the evaluate.EvaluationModuleInfo object | |
return evaluate.EvaluationModuleInfo( | |
# This is the description that will appear on the modules page. | |
module_type="metric", | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
# This defines the format of each prediction and reference | |
features=datasets.Features({ | |
'predictions': datasets.Value('large_string'), | |
'references': datasets.Value('large_string'), | |
}), | |
# Homepage of the module for documentation | |
homepage="https://github.com/tanyuqian/ctc-gen-eval", | |
# Additional links to the codebase or references | |
codebase_urls=["https://github.com/tanyuqian/ctc-gen-eval"], | |
reference_urls=["https://github.com/tanyuqian/ctc-gen-eval"] | |
) | |
def _download_and_prepare(self, dl_manager): | |
"""Optional: download external resources useful to compute the scores""" | |
# TODO: Download external resources if needed | |
import nltk | |
nltk.download('stopwords') | |
import subprocess | |
import sys | |
def install(package): | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
try: | |
from ctc_score import StyleTransferScorer, SummarizationScorer, DialogScorer | |
except: | |
print('ctc package is not installed. installing...') | |
install('ctc-score') | |
if self.config_name == 'default': | |
self.config_name = 'D-cnndm,consistency' | |
model_name, self.aspect = self.config_name.split(',') | |
if self.aspect in ['consistency', 'relevance']: | |
self.scorer = SummarizationScorer(align=model_name, device='cpu') | |
elif self.aspect in ['preservation']: | |
self.scorer = StyleTransferScorer(align=model_name) | |
elif self.aspect in ['engagingness', 'groundedness']: | |
self.scorer = DialogScorer(align=model_name) | |
print(self.compute(references=['hello world'], predictions=['hi world'])) | |
def _compute(self, predictions, references): | |
"""Returns the scores""" | |
# TODO: Compute the different scores of the module | |
assert len(predictions) == len(references) | |
print('computing...') | |
print(predictions) | |
print(references) | |
ctc_score = self.scorer.score(doc=references[0], refs=[], hypo=predictions[0], aspect=self.aspect) | |
return { | |
"ctc_score": ctc_score | |
} |