submit distinct
Browse files- distinct.py +122 -31
distinct.py
CHANGED
@@ -11,46 +11,80 @@
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
-
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
|
19 |
|
20 |
-
|
21 |
_CITATION = """\
|
22 |
-
@
|
23 |
-
title =
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
}
|
27 |
"""
|
28 |
|
29 |
-
|
30 |
_DESCRIPTION = """\
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
|
34 |
|
35 |
-
|
36 |
_KWARGS_DESCRIPTION = """
|
37 |
Calculates how good are predictions given some references, using certain scores
|
38 |
Args:
|
39 |
-
predictions: list of
|
40 |
-
should be a string with tokens separated by spaces.
|
41 |
-
references: list of reference for each prediction. Each
|
42 |
-
reference should be a string with tokens separated by spaces.
|
43 |
Returns:
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
Examples:
|
47 |
Examples should be written in doctest format, and should illustrate how
|
48 |
to use the function.
|
49 |
|
50 |
-
>>> my_new_module = evaluate.load("
|
51 |
-
>>> results = my_new_module.compute(references=[
|
|
|
|
|
|
|
|
|
|
|
52 |
>>> print(results)
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
"""
|
55 |
|
56 |
# TODO: Define external resources urls if needed
|
@@ -59,7 +93,6 @@ BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
61 |
class distinct(evaluate.Measurement):
|
62 |
-
"""TODO: Short description of my evaluation module."""
|
63 |
|
64 |
def _info(self):
|
65 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
@@ -71,25 +104,83 @@ class distinct(evaluate.Measurement):
|
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
-
'predictions': datasets.
|
75 |
-
'references': datasets.Value('int64'),
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
-
homepage="
|
79 |
# Additional links to the codebase or references
|
80 |
-
codebase_urls=["
|
81 |
-
reference_urls=["
|
82 |
)
|
83 |
|
84 |
def _download_and_prepare(self, dl_manager):
|
85 |
"""Optional: download external resources useful to compute the scores"""
|
86 |
-
# TODO: Download external resources if needed
|
87 |
pass
|
88 |
|
89 |
-
def _compute(self, predictions,
|
|
|
|
|
|
|
90 |
"""Returns the scores"""
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
"
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
+
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
|
19 |
|
20 |
+
|
21 |
_CITATION = """\
|
22 |
+
@inproceedings{liu-etal-2022-rethinking,
|
23 |
+
title = "Rethinking and Refining the Distinct Metric",
|
24 |
+
author = "Liu, Siyang and
|
25 |
+
Sabour, Sahand and
|
26 |
+
Zheng, Yinhe and
|
27 |
+
Ke, Pei and
|
28 |
+
Zhu, Xiaoyan and
|
29 |
+
Huang, Minlie",
|
30 |
+
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
|
31 |
+
year = "2022",
|
32 |
+
publisher = "Association for Computational Linguistics",
|
33 |
+
url = "https://aclanthology.org/2022.acl-short.86",
|
34 |
+
doi = "10.18653/v1/2022.acl-short.86",
|
35 |
+
}
|
36 |
+
@inproceedings{li-etal-2016-diversity,
|
37 |
+
title = "A Diversity-Promoting Objective Function for Neural Conversation Models",
|
38 |
+
author = "Li, Jiwei and
|
39 |
+
Galley, Michel and
|
40 |
+
Brockett, Chris and
|
41 |
+
Gao, Jianfeng and
|
42 |
+
Dolan, Bill",
|
43 |
+
booktitle = "Proceedings of the 2016 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies",
|
44 |
+
year = "2016",
|
45 |
+
publisher = "Association for Computational Linguistics",
|
46 |
+
url = "https://aclanthology.org/N16-1014",
|
47 |
+
doi = "10.18653/v1/N16-1014",
|
48 |
}
|
49 |
"""
|
50 |
|
51 |
+
|
52 |
_DESCRIPTION = """\
|
53 |
+
Distinct metric is to calculate corpus-level diversity of language. We provide two versions of distinct score. Expectation-Adjusted-Distinct is the default one, which removes
|
54 |
+
the biases of the original distinct score on lenthier sentences. Distinct is the original version.
|
55 |
+
|
56 |
+
For the use of Expectation-Adjusted-Distinct, vocab_size is required.
|
57 |
+
Please follow ACL paper https://aclanthology.org/2022.acl-short.86 for motivation and follow the rule of thumb provided by https://github.com/lsy641/Expectation-Adjusted-Distinct/blob/main/EAD.ipynb to determine the vocab_size
|
58 |
"""
|
59 |
|
60 |
|
61 |
+
|
62 |
_KWARGS_DESCRIPTION = """
|
63 |
Calculates how good are predictions given some references, using certain scores
|
64 |
Args:
|
65 |
+
predictions: list of sentecnes. Each prediction should be a string.
|
|
|
|
|
|
|
66 |
Returns:
|
67 |
+
Expectation-Adjusted-Distinct
|
68 |
+
Distinct-1
|
69 |
+
Distinct-2
|
70 |
+
Distinct-3
|
71 |
Examples:
|
72 |
Examples should be written in doctest format, and should illustrate how
|
73 |
to use the function.
|
74 |
|
75 |
+
>>> my_new_module = evaluate.load("distinct")
|
76 |
+
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], vocab_size=50257)
|
77 |
+
>>> print(results)
|
78 |
+
|
79 |
+
|
80 |
+
>>> dataset = ["This is my friend jack", "I'm sorry to hear that", "But you know I am the one who always support you", "Welcome to our family"]
|
81 |
+
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], dataForVocabCal = dataset)
|
82 |
>>> print(results)
|
83 |
+
|
84 |
+
|
85 |
+
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], mode="Distinct")
|
86 |
+
>>> print(results)
|
87 |
+
|
88 |
"""
|
89 |
|
90 |
# TODO: Define external resources urls if needed
|
|
|
93 |
|
94 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
95 |
class distinct(evaluate.Measurement):
|
|
|
96 |
|
97 |
def _info(self):
|
98 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
|
|
104 |
inputs_description=_KWARGS_DESCRIPTION,
|
105 |
# This defines the format of each prediction and reference
|
106 |
features=datasets.Features({
|
107 |
+
'predictions': datasets.Sequence('string')
|
|
|
108 |
}),
|
109 |
# Homepage of the module for documentation
|
110 |
+
homepage="https://huggingface.co/spaces/lsy641/distinct",
|
111 |
# Additional links to the codebase or references
|
112 |
+
codebase_urls=["https://github.com/lsy641/Expectation-Adjusted-Distinct/tree/main"],
|
113 |
+
reference_urls=["https://aclanthology.org/2022.acl-short.86/"]
|
114 |
)
|
115 |
|
116 |
def _download_and_prepare(self, dl_manager):
|
117 |
"""Optional: download external resources useful to compute the scores"""
|
|
|
118 |
pass
|
119 |
|
120 |
+
def _compute(self, predictions, dataForVocabCal=None, vocab_size=None, tokenizer="white_space", mode="Expectation-Adjusted-Distinct"):
|
121 |
+
|
122 |
+
from nltk.util import ngrams
|
123 |
+
|
124 |
"""Returns the scores"""
|
125 |
+
if mode == "Expectation-Adjusted-Distinct" and vocab_size is None and dataForVocabCal is None:
|
126 |
+
raise ValueError("Either vocab_size or dataForVocabCal needs to be specified when using mode 'Expectation-Adjusted-Distinct'. See https://github.com/lsy641/Expectation-Adjusted-Distinct/blob/main/EAD.ipynb for vocab_size specification. \n Or use mode='Distinct' to get original version of distinct score.")
|
127 |
+
elif mode == "Expectation-Adjusted-Distinct" and vocab_size is not None and dataForVocabCal is not None:
|
128 |
+
raise Warning("We've detected that both vocab_size and dataForVocabCal are specified. We will use dataForVocabCal.")
|
129 |
+
elif mode == "Distinct":
|
130 |
+
pass
|
131 |
+
|
132 |
+
if mode == "Expectation-Adjusted-Distinct" and dataForVocabCal is not None:
|
133 |
+
if isinstance(dataForVocabCal, list) and len(dataForVocabCal) > 0 and isinstance(dataForVocabCal[0], str):
|
134 |
+
vocab = set()
|
135 |
+
for sentence in dataForVocabCal:
|
136 |
+
if tokenizer == "white_space":
|
137 |
+
vocab = vocab | set(sentence.split(" "))
|
138 |
+
else:
|
139 |
+
vocab = vocab | set(tokenizer.tokenize(sentence))
|
140 |
+
vocab_size = len(vocab)
|
141 |
+
else:
|
142 |
+
raise TypeError("Argument dataForVocabCal should be a list of strings")
|
143 |
+
distinct_tokens = set()
|
144 |
+
distinct_tokens_2grams = set()
|
145 |
+
distinct_tokens_3grams = set()
|
146 |
+
total_tokens = []
|
147 |
+
total_tokens_2grams = []
|
148 |
+
total_tokens_3grams = []
|
149 |
+
for prediction in predictions:
|
150 |
+
if tokenizer == "white_space":
|
151 |
+
tokens = prediction.split(" ")
|
152 |
+
tokens_2grams = ngrams(prediction.split(" "), 2, left_pad_symbol='<s>')
|
153 |
+
tokens_3grams = ngrams(prediction.split(" "), 3, left_pad_symbol='<s>')
|
154 |
+
else:
|
155 |
+
try:
|
156 |
+
tokens = list(tokenizer.tokenize(prediction))
|
157 |
+
tokens_2grams = ngrams(list(tokenizer.tokenize(prediction)), 2, left_pad_symbol='<s>')
|
158 |
+
tokens_3grams = ngrams(list(tokenizer.tokenize(prediction)), 3, left_pad_symbol='<s>')
|
159 |
+
except Exception as e:
|
160 |
+
raise e
|
161 |
+
|
162 |
+
distinct_tokens = distinct_tokens | set(tokens)
|
163 |
+
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams)
|
164 |
+
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams)
|
165 |
+
total_tokens.extend(tokens)
|
166 |
+
total_tokens_2grams.extend(list(tokens_2grams))
|
167 |
+
total_tokens_3grams.extend(list(tokens_3grams))
|
168 |
+
|
169 |
+
Distinct_1 = len(distinct_tokens)/len(total_tokens)
|
170 |
+
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams)
|
171 |
+
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams)
|
172 |
+
if mode == "Expectation-Adjusted-Distinct":
|
173 |
+
Expectation_Adjusted_Distinct = len(distinct_tokens)/(vocab_size*(1-((vocab_size-1)/vocab_size)**len(total_tokens)))
|
174 |
+
return {
|
175 |
+
"Expectation-Adjusted-Distinct": Expectation_Adjusted_Distinct
|
176 |
+
"Distinct-1": Distinct_1,
|
177 |
+
"Distinct-2": Distinct_2,
|
178 |
+
"Distinct-3": Distinct_3
|
179 |
+
}
|
180 |
+
|
181 |
+
if mode == "Distinct":
|
182 |
+
return {
|
183 |
+
"Distinct-1": Distinct_1,
|
184 |
+
"Distinct-2": Distinct_2,
|
185 |
+
"Distinct-3": Distinct_3
|
186 |
+
}
|