ydshieh HF staff commited on
Commit
fbc7904
1 Parent(s): ce6946c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -6
README.md CHANGED
@@ -74,8 +74,6 @@ The model can be evaluated as follows on the zh-CN test data of Common Voice.
74
  Original CER calculation refer to https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese
75
 
76
  ```python
77
- !mkdir cer
78
- !wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
79
  !pip install jiwer
80
 
81
  import torch
@@ -83,15 +81,41 @@ import torchaudio
83
  from datasets import load_dataset, load_metric
84
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
85
  import re
86
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  test_dataset = load_dataset("common_voice", "zh-CN", split="test")
88
- wer = load_metric("wer")
89
 
90
  processor = Wav2Vec2Processor.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
91
  model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
92
  model.to("cuda")
93
 
94
- chars_to_ignore_regex = '[\,\?\.\!\-\;\:"\“\%\‘\”\�\.\⋯\!\-\:\–\。\》\,\)\,\?\;\~\~\…\︰\,\(\」\‧\《\﹔\、\—\/\,\「\﹖\·\'\×\̃\̌\ε\λ\μ\и\т\─\□\〈\〉\『\』\ア\オ\カ\チ\ド\ベ\ャ\ヤ\ン\・\丶\a\b\f\g\i\n\p\t]'
 
95
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
96
 
97
  # Preprocessing the datasets.
@@ -118,7 +142,7 @@ def evaluate(batch):
118
 
119
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
120
 
121
- print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
122
  ```
123
 
124
  **Test Result**: 43.00 %
 
74
  Original CER calculation refer to https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese
75
 
76
  ```python
 
 
77
  !pip install jiwer
78
 
79
  import torch
 
81
  from datasets import load_dataset, load_metric
82
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
83
  import re
84
+ import jiwer
85
+
86
+ def chunked_cer(targets, predictions, chunk_size=None):
87
+
88
+ _predictions = [char for seq in predictions for char in list(seq)]
89
+ _targets = [char for seq in targets for char in list(seq)]
90
+
91
+ if chunk_size is None: return jiwer.wer(_targets, _predictions)
92
+
93
+ start = 0
94
+ end = chunk_size
95
+ H, S, D, I = 0, 0, 0, 0
96
+
97
+ while start < len(targets):
98
+
99
+ _predictions = [char for seq in predictions[start:end] for char in list(seq)]
100
+ _targets = [char for seq in targets[start:end] for char in list(seq)]
101
+ chunk_metrics = jiwer.compute_measures(_targets, _predictions)
102
+ H = H + chunk_metrics["hits"]
103
+ S = S + chunk_metrics["substitutions"]
104
+ D = D + chunk_metrics["deletions"]
105
+ I = I + chunk_metrics["insertions"]
106
+ start += chunk_size
107
+ end += chunk_size
108
+
109
+ return float(S + D + I) / float(H + S + D)
110
+
111
  test_dataset = load_dataset("common_voice", "zh-CN", split="test")
 
112
 
113
  processor = Wav2Vec2Processor.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
114
  model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
115
  model.to("cuda")
116
 
117
+ chars_to_ignore_regex = '[\,\?\.\!\-\;\:"\“\%\‘\”\�\.\⋯\!\-\:\–\。\》\,\)\,\?\;\~\~\…\︰\,\(\」\‧\《\﹔\、\—\/\,\「\﹖\·\×\̃\̌\ε\λ\μ\и\т\─\□\〈\〉\『\』\ア\オ\カ\チ\ド\ベ\ャ\ヤ\ン\・\丶\a\b\f\g\i\n\p\t' + "\']"
118
+
119
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
120
 
121
  # Preprocessing the datasets.
 
142
 
143
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
144
 
145
+ print("CER: {:2f}".format(100 * chunked_cer(predictions=result["pred_strings"], targets=result["sentence"], chunk_size=1000)))
146
  ```
147
 
148
  **Test Result**: 43.00 %