File size: 6,439 Bytes
f959a75 23ffbd3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
""" Character Error Ratio (CER) metric. """
from typing import List
import datasets, evaluate , jiwer
import jiwer.transforms as tr
from datasets.config import PY_VERSION
from packaging import version
if PY_VERSION < version.parse("3.8"):
import importlib_metadata
else:
import importlib.metadata as importlib_metadata
SENTENCE_DELIMITER = ""
if version.parse(importlib_metadata.version("jiwer")) < version.parse("2.3.0"):
class SentencesToListOfCharacters(tr.AbstractTransform):
def __init__(self, sentence_delimiter: str = " "):
self.sentence_delimiter = sentence_delimiter
def process_string(self, s: str):
return list(s)
def process_list(self, inp: List[str]):
chars = []
for sent_idx, sentence in enumerate(inp):
chars.extend(self.process_string(sentence))
if self.sentence_delimiter is not None and self.sentence_delimiter != "" and sent_idx < len(inp) - 1:
chars.append(self.sentence_delimiter)
return chars
cer_transform = tr.Compose(
[tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(SENTENCE_DELIMITER)]
)
else:
cer_transform = tr.Compose(
[
tr.RemoveMultipleSpaces(),
tr.Strip(),
tr.ReduceToSingleSentence(SENTENCE_DELIMITER),
tr.ReduceToListOfListOfChars(),
]
)
_CITATION = """\
@inproceedings{inproceedings,
author = {Morris, Andrew and Maier, Viktoria and Green, Phil},
year = {2004},
month = {01},
pages = {},
title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition.}
}
"""
_DESCRIPTION = """\
Character error rate (CER) is a common metric of the performance of an automatic speech recognition system.
CER is similar to Word Error Rate (WER), but operates on character instead of word. Please refer to docs of WER for further information.
Character error rate can be computed as:
CER = (S + D + I) / N = (S + D + I) / (S + D + C)
where
S is the number of substitutions,
D is the number of deletions,
I is the number of insertions,
C is the number of correct characters,
N is the number of characters in the reference (N=S+D+C).
CER's output is not always a number between 0 and 1, in particular when there is a high number of insertions. This value is often associated to the percentage of characters that were incorrectly predicted. The lower the value, the better the
performance of the ASR system with a CER of 0 being a perfect score.
"""
_KWARGS_DESCRIPTION = """
Computes CER score of transcribed segments against references.
Args:
references: list of references for each speech input.
predictions: list of transcribtions to score.
concatenate_texts: Whether or not to concatenate sentences before evaluation, set to True for more accurate result.
Returns:
(float): the character error rate
Examples for Hungarain Languge:
>>> # Colab usage
>>> !pip install evaluate jiwer
>>> import evaluate
>>> from evaluate import load
>>> predictions = ["ez a jóslat", "van egy másik minta is"]
>>> references = ["ez a hivatkozás", "van még egy"]
>>> cer = evaluate.load("cer")
>>> cer_score = cer.compute(predictions=predictions, references=references)
>>> print(cer_score)
>>> 0.9615384615384616
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class CER(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}
),
codebase_urls=["https://github.com/jitsi/jiwer/"],
reference_urls=[
"https://en.wikipedia.org/wiki/Word_error_rate",
"https://sites.google.com/site/textdigitisation/qualitymeasures/computingerrorrates",
],
)
def _compute(self, predictions, references, concatenate_texts=False):
if concatenate_texts:
return jiwer.compute_measures(
references,
predictions,
truth_transform=cer_transform,
hypothesis_transform=cer_transform,
)["wer"]
incorrect = 0
total = 0
for prediction, reference in zip(predictions, references):
measures = jiwer.compute_measures(
reference,
prediction,
truth_transform=cer_transform,
hypothesis_transform=cer_transform,
)
incorrect += measures["substitutions"] + measures["deletions"] + measures["insertions"]
total += measures["substitutions"] + measures["deletions"] + measures["hits"]
return incorrect / total
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class WER(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}
),
codebase_urls=["https://github.com/jitsi/jiwer/"],
reference_urls=[
"https://en.wikipedia.org/wiki/Word_error_rate",
],
)
def _compute(self, predictions=None, references=None, concatenate_texts=False):
if concatenate_texts:
return compute_measures(references, predictions)["wer"]
else:
incorrect = 0
total = 0
for prediction, reference in zip(predictions, references):
measures = compute_measures(reference, prediction)
incorrect += measures["substitutions"] + measures["deletions"] + measures["insertions"]
total += measures["substitutions"] + measures["deletions"] + measures["hits"]
return incorrect / total |