lvwerra HF staff commited on
Commit
cc5e258
1 Parent(s): 86af275

Update Space (evaluate main: dfdd0cc0)

Browse files
Files changed (1) hide show
  1. bleu.py +19 -7
bleu.py CHANGED
@@ -57,7 +57,7 @@ _KWARGS_DESCRIPTION = """
57
  Computes BLEU score of translated segments against one or more references.
58
  Args:
59
  predictions: list of translations to score.
60
- references: list of lists of references for each translation.
61
  tokenizer : approach used for tokenizing `predictions` and `references`.
62
  The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
63
  This can be replaced by any function that takes a string as input and returns a list of tokens as output.
@@ -91,12 +91,20 @@ class Bleu(evaluate.EvaluationModule):
91
  description=_DESCRIPTION,
92
  citation=_CITATION,
93
  inputs_description=_KWARGS_DESCRIPTION,
94
- features=datasets.Features(
95
- {
96
- "predictions": datasets.Value("string", id="sequence"),
97
- "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
98
- }
99
- ),
 
 
 
 
 
 
 
 
100
  codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
101
  reference_urls=[
102
  "https://en.wikipedia.org/wiki/BLEU",
@@ -105,6 +113,10 @@ class Bleu(evaluate.EvaluationModule):
105
  )
106
 
107
  def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
 
 
 
 
108
  references = [[tokenizer(r) for r in ref] for ref in references]
109
  predictions = [tokenizer(p) for p in predictions]
110
  score = compute_bleu(
 
57
  Computes BLEU score of translated segments against one or more references.
58
  Args:
59
  predictions: list of translations to score.
60
+ references: list of lists of or just a list of references for each translation.
61
  tokenizer : approach used for tokenizing `predictions` and `references`.
62
  The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
63
  This can be replaced by any function that takes a string as input and returns a list of tokens as output.
 
91
  description=_DESCRIPTION,
92
  citation=_CITATION,
93
  inputs_description=_KWARGS_DESCRIPTION,
94
+ features=[
95
+ datasets.Features(
96
+ {
97
+ "predictions": datasets.Value("string", id="sequence"),
98
+ "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
99
+ }
100
+ ),
101
+ datasets.Features(
102
+ {
103
+ "predictions": datasets.Value("string", id="sequence"),
104
+ "references": datasets.Value("string", id="sequence"),
105
+ }
106
+ ),
107
+ ],
108
  codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
109
  reference_urls=[
110
  "https://en.wikipedia.org/wiki/BLEU",
 
113
  )
114
 
115
  def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
116
+ # if only one reference is provided make sure we still use list of lists
117
+ if isinstance(references[0], str):
118
+ references = [[ref] for ref in references]
119
+
120
  references = [[tokenizer(r) for r in ref] for ref in references]
121
  predictions = [tokenizer(p) for p in predictions]
122
  score = compute_bleu(