Spaces:
Running
Running
Update Space (evaluate main: dfdd0cc0)
Browse files
bleu.py
CHANGED
@@ -57,7 +57,7 @@ _KWARGS_DESCRIPTION = """
|
|
57 |
Computes BLEU score of translated segments against one or more references.
|
58 |
Args:
|
59 |
predictions: list of translations to score.
|
60 |
-
references: list of lists of references for each translation.
|
61 |
tokenizer : approach used for tokenizing `predictions` and `references`.
|
62 |
The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
|
63 |
This can be replaced by any function that takes a string as input and returns a list of tokens as output.
|
@@ -91,12 +91,20 @@ class Bleu(evaluate.EvaluationModule):
|
|
91 |
description=_DESCRIPTION,
|
92 |
citation=_CITATION,
|
93 |
inputs_description=_KWARGS_DESCRIPTION,
|
94 |
-
features=
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
|
101 |
reference_urls=[
|
102 |
"https://en.wikipedia.org/wiki/BLEU",
|
@@ -105,6 +113,10 @@ class Bleu(evaluate.EvaluationModule):
|
|
105 |
)
|
106 |
|
107 |
def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
|
|
|
|
|
|
|
|
|
108 |
references = [[tokenizer(r) for r in ref] for ref in references]
|
109 |
predictions = [tokenizer(p) for p in predictions]
|
110 |
score = compute_bleu(
|
|
|
57 |
Computes BLEU score of translated segments against one or more references.
|
58 |
Args:
|
59 |
predictions: list of translations to score.
|
60 |
+
references: list of lists of or just a list of references for each translation.
|
61 |
tokenizer : approach used for tokenizing `predictions` and `references`.
|
62 |
The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
|
63 |
This can be replaced by any function that takes a string as input and returns a list of tokens as output.
|
|
|
91 |
description=_DESCRIPTION,
|
92 |
citation=_CITATION,
|
93 |
inputs_description=_KWARGS_DESCRIPTION,
|
94 |
+
features=[
|
95 |
+
datasets.Features(
|
96 |
+
{
|
97 |
+
"predictions": datasets.Value("string", id="sequence"),
|
98 |
+
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
|
99 |
+
}
|
100 |
+
),
|
101 |
+
datasets.Features(
|
102 |
+
{
|
103 |
+
"predictions": datasets.Value("string", id="sequence"),
|
104 |
+
"references": datasets.Value("string", id="sequence"),
|
105 |
+
}
|
106 |
+
),
|
107 |
+
],
|
108 |
codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
|
109 |
reference_urls=[
|
110 |
"https://en.wikipedia.org/wiki/BLEU",
|
|
|
113 |
)
|
114 |
|
115 |
def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
|
116 |
+
# if only one reference is provided make sure we still use list of lists
|
117 |
+
if isinstance(references[0], str):
|
118 |
+
references = [[ref] for ref in references]
|
119 |
+
|
120 |
references = [[tokenizer(r) for r in ref] for ref in references]
|
121 |
predictions = [tokenizer(p) for p in predictions]
|
122 |
score = compute_bleu(
|