kwargs isn't passed down when using evaluate.combine()

#2
by alvations - opened

Using the default .load() function works:

import evaluate
bertscore = evaluate.load("bertscore")
predictions = ["hello there", "general kenobi"]
references = ["hello there", "general kenobi"]
results = bertscore.compute(predictions=predictions, references=references, lang="en")
results

[out]:

{'precision': [1.000000238418579, 0.9999999403953552],
 'recall': [1.000000238418579, 0.9999999403953552],
 'f1': [1.000000238418579, 0.9999999403953552],
 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.20.1)'}

But when using .combine(), e.g.

import evaluate
bertscore = evaluate.combine(["bertscore"])
predictions = ["hello there", "general kenobi"]
references = ["hello there", "general kenobi"]
results = bertscore.compute(predictions=predictions, references=references, lang="en")
results

it throws the error below, it looks like the kwargs isn't passed down:

[out]:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_27/994019662.py in <module>
      3 predictions = ["hello there", "general kenobi"]
      4 references = ["hello there", "general kenobi"]
----> 5 results = bertscore.compute(predictions=predictions, references=references, lang="en")
      6 results

/opt/conda/lib/python3.7/site-packages/evaluate/module.py in compute(self, predictions, references, **kwargs)
    860             batch = {"predictions": predictions, "references": references, **kwargs}
    861             batch = {input_name: batch[input_name] for input_name in evaluation_module._feature_names()}
--> 862             results.append(evaluation_module.compute(**batch))
    863 
    864         return self._merge_results(results)

/opt/conda/lib/python3.7/site-packages/evaluate/module.py in compute(self, predictions, references, **kwargs)
    442             inputs = {input_name: self.data[input_name] for input_name in self._feature_names()}
    443             with temp_seed(self.seed):
--> 444                 output = self._compute(**inputs, **compute_kwargs)
    445 
    446             if self.buf_writer is not None:

~/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bertscore/cf4907b18f8f741f202232c0f8009a3bd49ff98802c245abcb6ea51a37a8c05b/bertscore.py in _compute(self, predictions, references, lang, model_type, num_layers, verbose, idf, device, batch_size, nthreads, all_layers, rescale_with_baseline, baseline_path, use_fast_tokenizer)
    169             if lang is None:
    170                 raise ValueError(
--> 171                     "Either 'lang' (e.g. 'en') or 'model_type' (e.g. 'microsoft/deberta-xlarge-mnli')"
    172                     " must be specified"
    173                 )

ValueError: Either 'lang' (e.g. 'en') or 'model_type' (e.g. 'microsoft/deberta-xlarge-mnli') must be specified

Sign up or log in to comment