nithinholla commited on
Commit
260efc1
1 Parent(s): d519f76

Updated model

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. config.json +3 -3
  3. pytorch_model.bin +2 -2
  4. vocab.json +1 -1
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
- value: 22.05
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-53-Dutch
@@ -86,13 +86,13 @@ processor = Wav2Vec2Processor.from_pretrained("nithinholla/wav2vec2-large-xlsr-5
86
  model = Wav2Vec2ForCTC.from_pretrained("nithinholla/wav2vec2-large-xlsr-53-dutch")
87
  model.to("cuda")
88
 
89
- chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\'\�\(\)\&\–\—\—\…\´\’]'
90
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
91
 
92
  # Preprocessing the datasets.
93
  # We need to read the audio files as arrays
94
  def speech_file_to_array_fn(batch):
95
- batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
96
  speech_array, sampling_rate = torchaudio.load(batch["path"])
97
  batch["speech"] = resampler(speech_array).squeeze().numpy()
98
  return batch
@@ -116,7 +116,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
116
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
117
  ```
118
 
119
- **Test Result**: 22.05 %
120
 
121
 
122
  ## Training
 
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
+ value: 21.72
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-53-Dutch
 
86
  model = Wav2Vec2ForCTC.from_pretrained("nithinholla/wav2vec2-large-xlsr-53-dutch")
87
  model.to("cuda")
88
 
89
+ chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\'\�\(\)\&\–\—\=\…]'
90
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
91
 
92
  # Preprocessing the datasets.
93
  # We need to read the audio files as arrays
94
  def speech_file_to_array_fn(batch):
95
+ batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower().replace("´", "'").replace("’", "'")
96
  speech_array, sampling_rate = torchaudio.load(batch["path"])
97
  batch["speech"] = resampler(speech_array).squeeze().numpy()
98
  return batch
 
116
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
117
  ```
118
 
119
+ **Test Result**: 21.72 %
120
 
121
 
122
  ## Training
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
  "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
@@ -70,7 +70,7 @@
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
- "pad_token_id": 39,
74
  "transformers_version": "4.5.0.dev0",
75
- "vocab_size": 40
76
  }
 
1
  {
2
+ "_name_or_path": "/workspace/models/nl_third/checkpoint-13000",
3
  "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
 
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
+ "pad_token_id": 41,
74
  "transformers_version": "4.5.0.dev0",
75
+ "vocab_size": 42
76
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7af24042f0f1585a49c5952219c54d10f5eedb1880d5b2a9a1a4d0492568c0e
3
- size 1262097815
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d66e36e390922ffe5ab48bcd36d637e94d3058955e987c62e41a04a276d368e
3
+ size 1262106007
vocab.json CHANGED
@@ -1 +1 @@
1
- {"t": 0, "q": 1, "b": 2, "r": 3, "p": 4, "ë": 5, "h": 6, "d": 7, "a": 8, "é": 10, "g": 11, "f": 12, "ü": 13, "i": 14, "n": 15, "x": 16, "á": 17, "ê": 18, "l": 19, "à": 20, "s": 21, "ó": 22, "w": 23, "u": 24, "k": 25, "j": 26, "y": 27, "è": 28, "z": 29, "m": 30, "v": 31, "e": 32, "ú": 33, "o": 34, "ï": 35, "c": 36, "ö": 37, "|": 9, "[UNK]": 38, "[PAD]": 39}
 
1
+ {"x": 0, "n": 1, "u": 2, "b": 3, "p": 5, "g": 6, "ï": 7, "o": 8, "i": 9, "c": 10, "m": 11, "a": 12, "l": 13, "t": 14, "ü": 15, "é": 16, "á": 17, "e": 18, "r": 19, "f": 20, "w": 21, "´": 22, "v": 23, "ö": 24, "z": 25, "y": 26, "d": 27, "ó": 28, "h": 29, "s": 30, "q": 31, "k": 32, "'": 33, "ê": 34, "à": 35, "ë": 36, "è": 37, "j": 38, "ú": 39, "|": 4, "[UNK]": 40, "[PAD]": 41}