Spaces:
Runtime error
Runtime error
import datasets | |
import evaluate | |
import numpy as np | |
from Levenshtein import distance as lev_dist | |
_DESCRIPTION = """ | |
TokenEditDistance: This is an NLP evaluation metric that records the minimum number of token edits | |
(insertions, deletions, and replacements, all weighted equally) to the prediction string in order | |
to make it exactly match the reference string. Uses identical logic to Levenshtein Edit Distance, | |
except applied to tokens (i.e. individual ints in a list) as opposed to individual characters in a string. | |
""" | |
_CITATION = "Man of a thousand and eight names" | |
_KWARGS_DESCRIPTION = """ | |
TokenEditDistance: | |
Args: | |
predictions: list of predictions to score. | |
Each prediction should be tokenized into a list of tokens. | |
references: list of references/ground truth output to score against. | |
Each reference should be tokenized into a list of tokens. | |
Returns: | |
"avg_token_edit_distance": Float, average Token Edit Distance for all inputted predictions and references | |
"token_edit_distances": List[Int], the Token Edit Distance for each inputted prediction and reference | |
Examples: | |
>>> token_edit_distance_metric = datasets.load_metric('Token Edit Distance') | |
>>> references = [[15, 4243], [100, 10008]] | |
>>> predictions = [[15, 4243], [100, 10009]] | |
>>> results = token_edit_distance_metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'avg_token_edit_distance': 0.5, 'token_edit_distances': array([0. 1.])} | |
""" | |
class TokenEditDistance(evaluate.Metric): | |
def _info(self): | |
return evaluate.MetricInfo( | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=datasets.Features( | |
{ | |
"predictions": datasets.features.Sequence(datasets.Value("int32")), | |
"references": datasets.features.Sequence(datasets.Value("int32")), | |
} | |
), | |
codebase_urls=[], | |
reference_urls=[], | |
) | |
def _compute(self, references, predictions): | |
if len(predictions) != len(references): | |
raise KeyError( | |
"Token Edit Distance: Compute Error: Number of predictions does not match number of references." | |
) | |
edit_dist_arr = np.zeros(len(predictions)) | |
for i in range(len(edit_dist_arr)): | |
if len(predictions[i]) != len(references[i]): | |
raise KeyError( | |
"Token Edit Distance: Compute Error: Prediction length does not match reference length for example" + | |
str(i) + " (prediction len: " + str(len(predictions[i])) + ", reference len: " + str(len(references[i])) + ")." | |
) | |
edit_dist_arr[i] = lev_dist(predictions[i], references[i]) | |
return { | |
"avg_token_edit_distance": np.mean(edit_dist_arr), | |
"token_edit_distances": edit_dist_arr, | |
} | |