carlosdanielhernandezmena commited on
Commit
1ea6594
1 Parent(s): 478b2d8

Changing the variable sentence

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -148,7 +148,7 @@ ds=load_dataset("ciempiess/ciempiess_test", split="test")
148
  import re
149
  chars_to_ignore_regex = '[\\,\\?\\.\\!\\\;\\:\\"\\“\\%\\‘\\”\\�\\)\\(\\*)]'
150
  def remove_special_characters(batch):
151
- batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
152
  return batch
153
  ds = ds.map(remove_special_characters)
154
  #Downsample to 16kHz
@@ -159,7 +159,7 @@ def prepare_dataset(batch):
159
  #Batched output is "un-batched" to ensure mapping is correct
160
  batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
161
  with processor.as_target_processor():
162
- batch["labels"] = processor(batch["sentence"]).input_ids
163
  return batch
164
  ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
165
  #Define the evaluation metric
@@ -182,11 +182,11 @@ def map_to_result(batch):
182
  logits = model(input_values).logits
183
  pred_ids = torch.argmax(logits, dim=-1)
184
  batch["pred_str"] = processor.batch_decode(pred_ids)[0]
185
- batch["sentence"] = processor.decode(batch["labels"], group_tokens=False)
186
  return batch
187
  results = ds.map(map_to_result,remove_columns=ds.column_names)
188
  #Compute the overall WER now.
189
- print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["sentence"])))
190
 
191
  ```
192
  **Test Result**: 0.112
 
148
  import re
149
  chars_to_ignore_regex = '[\\,\\?\\.\\!\\\;\\:\\"\\“\\%\\‘\\”\\�\\)\\(\\*)]'
150
  def remove_special_characters(batch):
151
+ batch["normalized_text"] = re.sub(chars_to_ignore_regex, '', batch["normalized_text"]).lower()
152
  return batch
153
  ds = ds.map(remove_special_characters)
154
  #Downsample to 16kHz
 
159
  #Batched output is "un-batched" to ensure mapping is correct
160
  batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
161
  with processor.as_target_processor():
162
+ batch["labels"] = processor(batch["normalized_text"]).input_ids
163
  return batch
164
  ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
165
  #Define the evaluation metric
 
182
  logits = model(input_values).logits
183
  pred_ids = torch.argmax(logits, dim=-1)
184
  batch["pred_str"] = processor.batch_decode(pred_ids)[0]
185
+ batch["normalized_text"] = processor.decode(batch["labels"], group_tokens=False)
186
  return batch
187
  results = ds.map(map_to_result,remove_columns=ds.column_names)
188
  #Compute the overall WER now.
189
+ print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["normalized_text"])))
190
 
191
  ```
192
  **Test Result**: 0.112