marcel commited on
Commit
7e77e3d
1 Parent(s): d97c9d7

eval code was not working

Browse files
Files changed (1) hide show
  1. README.md +36 -36
README.md CHANGED
@@ -43,7 +43,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
43
  test_dataset = load_dataset("common_voice", "de", split="test[:2%]")
44
 
45
  processor = Wav2Vec2Processor.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
46
- model = Wav2Vec2ForCTC.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
47
 
48
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
49
 
@@ -79,61 +79,61 @@ from datasets import load_dataset, load_metric
79
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
80
  import re
81
 
82
- test_dataset = load_dataset("common_voice", "de", split="test") [this](https://huggingface.co/languages) site.
83
  wer = load_metric("wer")
84
 
85
- processor = Wav2Vec2Processor.from_pretrained('marcel/wav2vec2-large-xlsr-53-german-demo')
86
- model = Wav2Vec2ForCTC.from_pretrained('marcel/wav2vec2-large-xlsr-53-german-demo')
87
  model.to("cuda")
88
 
89
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\”\�\カ\æ\無\ན\カ\臣\ѹ\…\«\»\ð\ı\„\幺\א\ב\比\ш\ע\)\ứ\в\œ\ч\+\—\ш\‚\נ\м\ń\乡\$\=\ש\ф\支\(\°\и\к\̇]'
90
  substitutions = {
91
- 'e' : '[\ə\é\ě\ę\ê\ế\ế\ë\ė\е]',
92
- 'o' : '[\ō\ô\ô\ó\ò\ø\ọ\ŏ\õ\ő\о]',
93
- 'a' : '[\á\ā\ā\ă\ã\å\â\à\ą\а]',
94
- 'c' : '[\č\ć\ç\с]',
95
- 'l' : '[\ł]',
96
- 'u' : '[\ú\ū\ứ\ů]',
97
- 'und' : '[\&]',
98
- 'r' : '[\ř]',
99
- 'y' : '[\ý]',
100
- 's' : '[\ś\š\ș\ş]',
101
- 'i' : '[\ī\ǐ\í\ï\î\ï]',
102
- 'z' : '[\ź\ž\ź\ż]',
103
- 'n' : '[\ñ\ń\ņ]',
104
- 'g' : '[\ğ]',
105
- 'ss' : '[\ß]',
106
- 't' : '[\ț\ť]',
107
- 'd' : '[\ď\đ]',
108
- "'": '[\ʿ\་\’\`\´\ʻ\`\‘]',
109
- 'p': '\р'
110
  }
111
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
112
 
113
  # Preprocessing the datasets.
114
  # We need to read the aduio files as arrays
115
  def speech_file_to_array_fn(batch):
116
- batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
117
- for x in substitutions:
118
- batch["sentence"] = re.sub(substitutions[x], x, batch["sentence"])
119
- speech_array, sampling_rate = torchaudio.load(batch["path"])
120
- speech_array, sampling_rate = torchaudio.load(batch["path"])
121
- batch["speech"] = resampler(speech_array).squeeze().numpy()
122
- return batch
123
 
124
  test_dataset = test_dataset.map(speech_file_to_array_fn)
125
 
126
  # Preprocessing the datasets.
127
  # We need to read the aduio files as arrays
128
  def evaluate(batch):
129
- inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
130
 
131
- with torch.no_grad():
132
- logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
133
 
134
- pred_ids = torch.argmax(logits, dim=-1)
135
- batch["pred_strings"] = processor.batch_decode(pred_ids)
136
- return batch
137
 
138
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
139
 
43
  test_dataset = load_dataset("common_voice", "de", split="test[:2%]")
44
 
45
  processor = Wav2Vec2Processor.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
46
+ model = Wav2Vec2ForCTC.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
47
 
48
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
49
 
79
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
80
  import re
81
 
82
+ test_dataset = load_dataset("common_voice", "de", split="test[:10%]")
83
  wer = load_metric("wer")
84
 
85
+ processor = Wav2Vec2Processor.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
86
+ model = Wav2Vec2ForCTC.from_pretrained("marcel/wav2vec2-large-xlsr-german-demo")
87
  model.to("cuda")
88
 
89
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\”\�\カ\æ\無\ན\カ\臣\ѹ\…\«\»\ð\ı\„\幺\א\ב\比\ш\ע\)\ứ\в\œ\ч\+\—\ш\‚\נ\м\ń\乡\$\=\ש\ф\支\(\°\и\к\̇]'
90
  substitutions = {
91
+ 'e' : '[\ə\é\ě\ę\ê\ế\ế\ë\ė\е]',
92
+ 'o' : '[\ō\ô\ô\ó\ò\ø\ọ\ŏ\õ\ő\о]',
93
+ 'a' : '[\á\ā\ā\ă\ã\å\â\à\ą\а]',
94
+ 'c' : '[\č\ć\ç\с]',
95
+ 'l' : '[\ł]',
96
+ 'u' : '[\ú\ū\ứ\ů]',
97
+ 'und' : '[\&]',
98
+ 'r' : '[\ř]',
99
+ 'y' : '[\ý]',
100
+ 's' : '[\ś\š\ș\ş]',
101
+ 'i' : '[\ī\ǐ\í\ï\î\ï]',
102
+ 'z' : '[\ź\ž\ź\ż]',
103
+ 'n' : '[\ñ\ń\ņ]',
104
+ 'g' : '[\ğ]',
105
+ 'ss' : '[\ß]',
106
+ 't' : '[\ț\ť]',
107
+ 'd' : '[\ď\đ]',
108
+ "'": '[\ʿ\་\’\`\´\ʻ\`\‘]',
109
+ 'p': '\р'
110
  }
111
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
112
 
113
  # Preprocessing the datasets.
114
  # We need to read the aduio files as arrays
115
  def speech_file_to_array_fn(batch):
116
+ batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
117
+ for x in substitutions:
118
+ batch["sentence"] = re.sub(substitutions[x], x, batch["sentence"])
119
+ speech_array, sampling_rate = torchaudio.load(batch["path"])
120
+ speech_array, sampling_rate = torchaudio.load(batch["path"])
121
+ batch["speech"] = resampler(speech_array).squeeze().numpy()
122
+ return batch
123
 
124
  test_dataset = test_dataset.map(speech_file_to_array_fn)
125
 
126
  # Preprocessing the datasets.
127
  # We need to read the aduio files as arrays
128
  def evaluate(batch):
129
+ inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
130
 
131
+ with torch.no_grad():
132
+ logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
133
 
134
+ pred_ids = torch.argmax(logits, dim=-1)
135
+ batch["pred_strings"] = processor.batch_decode(pred_ids)
136
+ return batch
137
 
138
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
139