gagan3012 commited on
Commit
8680eca
1 Parent(s): 8f7c665

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -18
README.md CHANGED
@@ -52,15 +52,15 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
- speech_array, sampling_rate = torchaudio.load(batch["path"])
56
- batch["speech"] = resampler(speech_array).squeeze().numpy()
57
- return batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
- logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
@@ -94,30 +94,30 @@ processor = Wav2Vec2Processor.from_pretrained("gagan3012/wav2vec2-xlsr-punjabi")
94
  model = Wav2Vec2ForCTC.from_pretrained("gagan3012/wav2vec2-xlsr-punjabi")
95
  model.to("cuda")
96
 
97
- chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“]' # TODO: adapt this list to include all special characters you removed from the data
98
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
99
 
100
  # Preprocessing the datasets.
101
  # We need to read the aduio files as arrays
102
  def speech_file_to_array_fn(batch):
103
- \tbatch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
104
- \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
105
- \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
106
- \treturn batch
107
 
108
  test_dataset = test_dataset.map(speech_file_to_array_fn)
109
 
110
  # Preprocessing the datasets.
111
  # We need to read the aduio files as arrays
112
  def evaluate(batch):
113
- \tinputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
114
 
115
- \twith torch.no_grad():
116
- \t\tlogits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
117
 
118
- \tpred_ids = torch.argmax(logits, dim=-1)
119
- \tbatch["pred_strings"] = processor.batch_decode(pred_ids)
120
- \treturn batch
121
 
122
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
123
 
@@ -129,6 +129,4 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"],
129
 
130
  ## Training
131
 
132
- The Common Voice `train`, `validation`, and ... datasets were used for training as well as ... and ... # TODO: adapt to state all the datasets that were used for training.
133
-
134
- The script used for training can be found [here](...) # TODO: fill in a link to your training script here. If you trained your model in a colab, simply fill in the link here. If you trained the model locally, it would be great if you could upload the training script on github and paste the link here.
 
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
+ \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
56
+ \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
57
+ \treturn batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
+ \tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
 
94
  model = Wav2Vec2ForCTC.from_pretrained("gagan3012/wav2vec2-xlsr-punjabi")
95
  model.to("cuda")
96
 
97
+ chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“]' # TODO: adapt this list to include all special characters you removed from the data
98
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
99
 
100
  # Preprocessing the datasets.
101
  # We need to read the aduio files as arrays
102
  def speech_file_to_array_fn(batch):
103
+ \\tbatch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
104
+ \\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
105
+ \\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
106
+ \\treturn batch
107
 
108
  test_dataset = test_dataset.map(speech_file_to_array_fn)
109
 
110
  # Preprocessing the datasets.
111
  # We need to read the aduio files as arrays
112
  def evaluate(batch):
113
+ \\tinputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
114
 
115
+ \\twith torch.no_grad():
116
+ \\t\\tlogits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
117
 
118
+ \\tpred_ids = torch.argmax(logits, dim=-1)
119
+ \\tbatch["pred_strings"] = processor.batch_decode(pred_ids)
120
+ \\treturn batch
121
 
122
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
123
 
 
129
 
130
  ## Training
131
 
132
+ The script used for training can be found [here](https://colab.research.google.com/drive/1A7Y20c1QkSHfdOmLXPMiOEpwlTjDZ7m5?usp=sharing)